{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>LicenseNo</th>\n",
       "      <th>FacilityID</th>\n",
       "      <th>FacilityName</th>\n",
       "      <th>Type</th>\n",
       "      <th>Street</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>LocationID</th>\n",
       "      <th>Reason</th>\n",
       "      <th>SectionViolations</th>\n",
       "      <th>RiskLevel</th>\n",
       "      <th>Geo_Loc</th>\n",
       "      <th>Inspection_Results</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>31103489027986</td>\n",
       "      <td>26-04-2010</td>\n",
       "      <td>4744</td>\n",
       "      <td>8123</td>\n",
       "      <td>7715</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>15522</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81876.0</td>\n",
       "      <td>CANVASS</td>\n",
       "      <td>33.0</td>\n",
       "      <td>High</td>\n",
       "      <td>locid16406</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10088999935915</td>\n",
       "      <td>21-06-2009</td>\n",
       "      <td>2973</td>\n",
       "      <td>12268</td>\n",
       "      <td>11664</td>\n",
       "      <td>GROCERY STORE</td>\n",
       "      <td>3057</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81862.0</td>\n",
       "      <td>COMPLAINT</td>\n",
       "      <td>33.0</td>\n",
       "      <td>High</td>\n",
       "      <td>locid878</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>40148966010272</td>\n",
       "      <td>01-05-2013</td>\n",
       "      <td>18223</td>\n",
       "      <td>1112</td>\n",
       "      <td>969</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>14988</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81883.0</td>\n",
       "      <td>CANVASS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>High</td>\n",
       "      <td>locid3368</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>37157708563676</td>\n",
       "      <td>28-09-2015</td>\n",
       "      <td>20825</td>\n",
       "      <td>20007</td>\n",
       "      <td>19115</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>3661</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81859.0</td>\n",
       "      <td>CANVASS RE-INSPECTION</td>\n",
       "      <td>31.0</td>\n",
       "      <td>Medium</td>\n",
       "      <td>locid11839</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>47478049564374</td>\n",
       "      <td>09-12-2015</td>\n",
       "      <td>2136</td>\n",
       "      <td>16867</td>\n",
       "      <td>10409</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>7876</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81886.0</td>\n",
       "      <td>COMPLAINT</td>\n",
       "      <td>30.0</td>\n",
       "      <td>High</td>\n",
       "      <td>locid12264</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               ID        Date  LicenseNo  FacilityID  FacilityName  \\\n",
       "0  31103489027986  26-04-2010       4744        8123          7715   \n",
       "1  10088999935915  21-06-2009       2973       12268         11664   \n",
       "2  40148966010272  01-05-2013      18223        1112           969   \n",
       "3  37157708563676  28-09-2015      20825       20007         19115   \n",
       "4  47478049564374  09-12-2015       2136       16867         10409   \n",
       "\n",
       "            Type  Street         City       State  LocationID  \\\n",
       "0     RESTAURANT   15522  id-11235901  id_1890134     81876.0   \n",
       "1  GROCERY STORE    3057  id-11235901  id_1890134     81862.0   \n",
       "2     RESTAURANT   14988  id-11235901  id_1890134     81883.0   \n",
       "3     RESTAURANT    3661  id-11235901  id_1890134     81859.0   \n",
       "4     RESTAURANT    7876  id-11235901  id_1890134     81886.0   \n",
       "\n",
       "                  Reason  SectionViolations RiskLevel     Geo_Loc  \\\n",
       "0                CANVASS               33.0      High  locid16406   \n",
       "1              COMPLAINT               33.0      High    locid878   \n",
       "2                CANVASS                NaN      High   locid3368   \n",
       "3  CANVASS RE-INSPECTION               31.0    Medium  locid11839   \n",
       "4              COMPLAINT               30.0      High  locid12264   \n",
       "\n",
       "   Inspection_Results  \n",
       "0                   4  \n",
       "1                   4  \n",
       "2                   6  \n",
       "3                   4  \n",
       "4                   4  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_excel('data/Data_Train.xlsx')\n",
    "train[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ID                    147443\n",
       "Date                    2507\n",
       "LicenseNo              34384\n",
       "FacilityID             25535\n",
       "FacilityName           24348\n",
       "Type                     409\n",
       "Street                 17290\n",
       "City                       2\n",
       "State                      2\n",
       "LocationID               104\n",
       "Reason                    17\n",
       "SectionViolations         61\n",
       "RiskLevel                  4\n",
       "Geo_Loc                16316\n",
       "Inspection_Results         7\n",
       "dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>LicenseNo</th>\n",
       "      <th>FacilityID</th>\n",
       "      <th>FacilityName</th>\n",
       "      <th>Type</th>\n",
       "      <th>Street</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>LocationID</th>\n",
       "      <th>Reason</th>\n",
       "      <th>SectionViolations</th>\n",
       "      <th>RiskLevel</th>\n",
       "      <th>Geo_Loc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47571900570810</td>\n",
       "      <td>19-03-2014</td>\n",
       "      <td>34218</td>\n",
       "      <td>2354</td>\n",
       "      <td>2185</td>\n",
       "      <td>GROCERY STORE</td>\n",
       "      <td>17352</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81864.0</td>\n",
       "      <td>CANVASS</td>\n",
       "      <td>19.0</td>\n",
       "      <td>Low</td>\n",
       "      <td>locid1145</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>19685766253655</td>\n",
       "      <td>22-11-2010</td>\n",
       "      <td>24597</td>\n",
       "      <td>13183</td>\n",
       "      <td>12516</td>\n",
       "      <td>SCHOOL</td>\n",
       "      <td>15536</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81871.0</td>\n",
       "      <td>CANVASS</td>\n",
       "      <td>32.0</td>\n",
       "      <td>High</td>\n",
       "      <td>locid16450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37885701847801</td>\n",
       "      <td>09-06-2014</td>\n",
       "      <td>1779</td>\n",
       "      <td>26434</td>\n",
       "      <td>25220</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>8849</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81902.0</td>\n",
       "      <td>CANVASS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>High</td>\n",
       "      <td>locid12916</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>39946284936679</td>\n",
       "      <td>07-04-2010</td>\n",
       "      <td>8541</td>\n",
       "      <td>16246</td>\n",
       "      <td>16384</td>\n",
       "      <td>SCHOOL</td>\n",
       "      <td>11663</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81869.0</td>\n",
       "      <td>CANVASS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>High</td>\n",
       "      <td>locid7292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46793577796409</td>\n",
       "      <td>25-07-2010</td>\n",
       "      <td>35194</td>\n",
       "      <td>20114</td>\n",
       "      <td>19225</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>16286</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81874.0</td>\n",
       "      <td>CANVASS RE-INSPECTION</td>\n",
       "      <td>32.0</td>\n",
       "      <td>High</td>\n",
       "      <td>locid2048</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               ID        Date  LicenseNo  FacilityID  FacilityName  \\\n",
       "0  47571900570810  19-03-2014      34218        2354          2185   \n",
       "1  19685766253655  22-11-2010      24597       13183         12516   \n",
       "2  37885701847801  09-06-2014       1779       26434         25220   \n",
       "3  39946284936679  07-04-2010       8541       16246         16384   \n",
       "4  46793577796409  25-07-2010      35194       20114         19225   \n",
       "\n",
       "            Type  Street         City       State  LocationID  \\\n",
       "0  GROCERY STORE   17352  id-11235901  id_1890134     81864.0   \n",
       "1         SCHOOL   15536  id-11235901  id_1890134     81871.0   \n",
       "2     RESTAURANT    8849  id-11235901  id_1890134     81902.0   \n",
       "3         SCHOOL   11663  id-11235901  id_1890134     81869.0   \n",
       "4     RESTAURANT   16286  id-11235901  id_1890134     81874.0   \n",
       "\n",
       "                  Reason  SectionViolations RiskLevel     Geo_Loc  \n",
       "0                CANVASS               19.0       Low   locid1145  \n",
       "1                CANVASS               32.0      High  locid16450  \n",
       "2                CANVASS                NaN      High  locid12916  \n",
       "3                CANVASS                NaN      High   locid7292  \n",
       "4  CANVASS RE-INSPECTION               32.0      High   locid2048  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test = pd.read_excel('data/Data_Test.xlsx')\n",
    "test[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((147443, 15), (49148, 14))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape, test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4    79453\n",
       "1    28456\n",
       "5    20631\n",
       "6    12691\n",
       "3     4747\n",
       "2     1420\n",
       "0       45\n",
       "Name: Inspection_Results, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.Inspection_Results.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4    0.538873\n",
       "1    0.192997\n",
       "5    0.139925\n",
       "6    0.086074\n",
       "3    0.032195\n",
       "2    0.009631\n",
       "0    0.000305\n",
       "Name: Inspection_Results, dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.Inspection_Results.value_counts(True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train.plot(x='SectionViolations',y='Inspection_Results',kind='scatter',figsize=(18,8))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x11fa98790>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABC4AAAIMCAYAAAA6m+2hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdf3xb9Xn3//dlO9gsppCENimQkHV0rbCh/Xa52814w2oaWBgNrDes891thdszdRne1gA1oLZbv5taMpLQ1lnj1lNLSjdB4S7lZxpSI3PXybdsadcWZ+pgI4F0NOmahEJcbGL7+v6hI9dKHMU/lBxJfj0fDz+cz0dHOteRPvGRLn3O9TF3FwAAAAAAQDGqCDsAAAAAAACAYyFxAQAAAAAAihaJCwAAAAAAULRIXAAAAAAAgKJF4gIAAAAAABQtEhcAAAAAAKBokbgAAOA4zOwuM/vbsOMIW77nwcyuMbO+kx1TMTCzQ2b2prDjKCQzW2pmbmZVYccCAACJCwBAyTCz3Wb2avBB8aCZPWpmi8OOa7zgw955YcdRysys18z+NOw4JjJRbO5e6+7PneB9Dgbj/mdm9nUze+OJ2l+eGIryNQEAlD8SFwCAUvNed6+V9EZJ+yR1hhzPCWMZnKshSTcE4/48SbWS1oYcDwAAJw1vhgAAJcndByXdL+n8bJ+ZnW5mXzGz/zaz583sY9kP/ma20czuH7ftGjPrCZIDTWb2YzO7LfhGe7eZfeBY+zazVjP7DzM7YGYPmdlZQf//DTb5QfDt+PsnuG+lma0L9rPLzG4YPyU/+GY7bmbbJP1C0pvM7KxgPweC/baOe7ycyzeyxzKuvdvMbjWzfwtmqXzZzGrG3X65mX3fzF4ys+1mduG42/4fM/uemb1iZvdKGrvfsZ8a6zSzn5vZj8xsedB5tZl994gNbzSzbxzn8caOJ9j+p2b2EzO7dtztlwXH9oqZ/ZeZ3XTE/SZ8Tc2s2szWmtkLZrbPzLrM7NRxt18RPC8vm9l/mtnvmllc0m9L2hC8vhuCbcdm2RxnDF5jZn3Bfg8Gr//K4z0H47n7S5K+Ient42KtMLNbgjj3m9nXzGx+cFuNmX016H/JzP7FzBYGt+02s/eMe5y/NrOvTvAaHHXcwf+bO4PX5Odm9kMzq5/KsQAAMFkkLgAAJcnMfkXS+yV9Z1x3p6TTJb1J0sWS/kRS9kPujZIuDD48/rakFkkfdHcPbl8k6UxJZ0v6oKQvmtlbJtjvuyV9WtIfKDPr43lJ90iSu/9OsNnbgssH7p0g9FZJK5X54PkOSVdOsM0fS7pO0mnB4ycl/VjSWZKukvSpbFJgkj4g6VJJvybp1yV9LDiWd0j6kqQPSVog6QuSHgo+1J+izAfkuyXNl3SfpP95nP28S9JzyjyPfyXp68EH6Ick/aqZRcZt+0fBY0/GImVe17OVed3+3szmBbclJH3I3U+TVC/piSPud6zXdE3wXLxdmVkMZ0v6hCSZ2TslfUXSzZLOkPQ7kna7e0zStxXMfnD3GyaINd8YzD5H/x7E9XeSEmZmk3weZGYLJL1P0n+M6/5zZcbRxcqMkYOS/j647YNBPIuVeY3bJL062f1J0jGO+xJlnpdfV+Y5er+k/VN5XAAAJovEBQCg1HzDzF6S9LKkFZLukDIzGZT58HSru7/i7rslrVMmCSB3/4UyH5bXS/qqpHZ3//ERj/1xdx9y9yclPapMcuJIH5D0JXf/nrsPSbpV0m+Z2dJJxv8Hkj7r7j9294OSbp9gm7vcfae7Dyvz4btRUoe7D7r79yX9Q/a4JmmDu+9x9wOS4pKag/5WSV9w96fcfcTdN0kakvSbwc8cSZ9x98Pufr+kfznOfn46bvt7lfmA/nvB83SvMs+/zKxO0lJJj0wy/sOS/t/gcR+TdEjSW8bddr6Zvc7dD7r7946471GvaZAoaJX0EXc/4O6vSPqUpD8M7tOizGu81d1H3f2/3P1HxwvyeGMw8Ly7d7v7iKRNyiS/Fk7iOficmf1c0s+USXq0j7vtQ5JiwZgakvTXkq6yzCyew8okLM4LXuPvuvvLk9jf8RxWJrH2Vknm7ml3/0kBHhcAgKOQuAAAlJor3f0MSdWSbpD0pJllv1k/RZkZClnPK/NNuiTJ3f9ZmRkBJulrRzzuQXcfOOK+Z02w/7PG78PdDynzTfPZE2w7kbMk7RnX3jPBNuP7zpKU/XA9PrbJ7u/Ixxt/XOdKujG4hOClICG0OLj9LEn/NW5GSva++Uy0fXZfmyT9ryBp8MeSvhZ8yJ6M/UESJ+sXytR5kDKzQC6T9LyZPWlmvzVuu2O9pq+X9CuSvjvuuL8Z9EuZ5+A/JxnbeMcdg5L2Zv8RJNM07ljy+XN3P13ShZLmSTpn3G3nSnpg3LGkJY0okxC5W9IWSfeY2Ytm9ndmNmdqh3U0d39C0gZlZnbsM7MvmtnrZvq4AABMhMQFAKAkBd8ef12ZD2iNynwTfViZD3FZSyT9V7ZhZn+mTMLjRUkfPeIh55nZ3CPu++IEu35x/D6C+ywYv5/j+IlyP3ROtCrK+A//L0qab2anHRFbdn8DynwIz1o0weON38f449ojKe7uZ4z7+RV3TwZxnn3EZQxLjnVQgYm2f1GS3P07kl5TplbC/9LkLxPJy93/xd2vkPQGZS5tGZ+QOtZr+jNlLpeoG3fcpwfFL6XM8/Jrx9plnnCOOwZnyt2flvS3ylwuk32u90haecTrWBPMFDns7p909/MlNUi6XJnLV6TJjZ2xXU8Qy+fc/Tck1SlzycjNMzs6AAAmRuICAFCSguKAVyjz7XM6mHr/NUlxMzvNzM6VtFqZy0JkZr+uzAe+P1LmG/+Pmtnbj3jYT5rZKUENjMuVqetwpH+SdK2Zvd3MqpW5xOCp4LIAKbPSyZvyhP41SX9hZmeb2RmSOvIdp7vvkbRd0qeDQosXKnMpwz8Gm3xf0mVmNj+YefKXEzzMn5nZOUG9iduUuWxDkroltZnZu4Lnc66Z/V6QJPn/JA1L+nMzqzKz90l6Z75YlUke/LmZzTGzqyVFJD027vavKPMt/bC79x3nsY4reK0+YGanu/thZS4fGjlis6NeU3cfDY79TjN7Q/BYZ5vZpcF9Esq8xsstU/jybDN7a3DbMV/f443BAtqkzHO9Kmh3Bfs8NziW1wf/N2RmUTO7ILiM5WVlEivZ5+j7kv4weL2WKVM/5VhyjtvM/kcwbuYokwAZ1NHPPQAABUHiAgBQah42s0PKfAiLK1Ngc2dwW7syH6Kek9SnTJLhS8G1/l+VtMbdf+DuzyrzAf7uIPkgZabwH1TmG/l/lNQ2UV0Dd++R9HFJ/0eZWQm/pl/WRpAy9QU2BdP2J6qR0S3pcUk/lPSvynywH1b+D33NytSEeFHSA5L+yt23BrfdLekHknYHjztRQdB/Cm57Lvj52+BYdihT62FDcOz/Iema4LbXlCkCeU1w2/slfT1PjJL0lKQ3KzPzIC7pKncfX7DxbmUKaBZktkXgjyXtNrOXlSk8+Ufjbsv3mnYoc7zfCe77LQV1M4JLiq6VdKekn0t6Ur+cRfFZZepHHDSzz00Qz4RjsDCHmhG8Np9TZhxmY3pI0uNm9ooyBWvfFdy2SJnVd15W5hKSJ/XLRMrHlRm/ByV9Moj1WI487tcpM5YPKnM5zH6xRCsA4ASx3EtRAQCYfcysSdJX3f2c4217Ava9UlKXu5973I2n9/i7Jf2pu3/rRDz+FGM5VZkCnu8Ikkcncl9NCuk1BQAAhcWMCwAATiIzO9XMLgsuvzhbmWVDHwg7rpPkw5L+5UQnLQAAQHmpCjsAAABmGVNmWv69yhSIfFTSJ0KN6CQIZn6YpCtDDqUoBZc/TWSlu3/7pAYDAECR4VIRAAAAAABQtLhUBAAAAAAAFC0SFwAAAAAAoGiRuAAAAAAAAEWLxAUAAAAAAChaJC4AAAAAAEDRInEBAAAAAACKFokLAAAAAABQtEhcAAAAAACAokXiAgAAAAAAFC0SFwAAAAAAoGiRuAAAAAAAAEWLxAUAAAAAAChaJC4AAAAAAEDRInEBAAAAAACKFokLAAAAAABQtEhcAAAAAACAokXiAgAAHMXMNpvZB0/Cfn7bzP59ktveZWZ/O4N9HTKzN033/gAAIBwkLgAAKDFm1mhm283s52Z2wMy2mdn/mMHj/bWZfXV8n7uvdPdNM4yz2cx2m5kd0V9lZj81s8vd/dvu/paZ7OcY++41sz8d3+fute7+XKH3BQAATiwSFwAAlBAze52kRyR1Spov6WxJn5Q0FGZcx/CApDMkXXxE/+9KcknfPOkRAQCAkkPiAgCA0vLrkuTuSXcfcfdX3f1xd/+hJJnZ/zaztJkdNLMtZnZu9o5mVmdmW4NZGvvM7DYz+11Jt0l6f3ApxQ+CbcdmLJhZhZl9zMyeD2ZKfMXMTg9uW2pmbmYfNLMXzOxnZhYLYhyU9DVJf3LEMfyJpH9092EzazKzH4+LMRLs+yUz22lmqyZ6Esxsnpk9Ymb/HRzrI2Z2TnBbXNJvS9oQHNOGoN/N7Lzg36cHx/HfwXF9zMwqgtuuMbM+M1sbPPYuM1s5bt/XmNlzZvZKcNsHpvdSAgCAySBxAQBAaXlG0oiZbTKzlWY2L3uDmV2pTBLifZJeL+nbkpLBbadJ+pYysxzOknSepB53/6akT0m6N7iU4m0T7POa4Ccq6U2SaiVtOGKbRklvkbRc0ifMLBL0b5J0lZmdGsRxuqT3SvrKkTsxszmSHpb0uKQ3SGqX9I9mNtGlJBWSvizpXElLJL2ajcndY8Gx3xAc0w0T3L9T0unB8VysTDLl2nG3v0vSv0s6U9LfSUpYxlxJn5O00t1Pk9Qg6fsTPD4AACgQEhcAAJQQd39ZmSSBS+qW9N9m9pCZLZT0IUmfdve0uw8rk5B4ezDr4nJJe919nbsPuvsr7v7UJHf7AUnr3f05dz8k6VZJf2hmVeO2+WQw++MHkn4g6W1BvNsk7ZP0+8F2fyDpGXef6MP+byqTFLnd3V9z9yeUuSymeYLnYb+7/x93/4W7vyIprqMvSZmQmVVKer+kW4PnYbekdZL+eNxmz7t7t7uPKJN8eaOkhcFto5LqzexUd/+Ju++czH4BAMD0kLgAAKDEBImJa9z9HEn1ysyg+Iwysw8+G1xm8ZKkA5JMmToYiyX95zR3eZak58e1n5dUpV9+kJekveP+/QtlEhBZX9EvLxf5Y2USAcfazx53Hz1iX2cfuaGZ/YqZfSG4zONlSf9X0hlBUuJ4zpR0io4+pvH7GTsed/9F8M9adx9QJunRJuknZvaomb11EvsEAADTROICAIAS5u4/knSXMgmMPZI+5O5njPs51d23B7f92rEe5ji7eVGZpEjWEknDysykmIyvSFpuZr+lzKyKf8qzn8XZWhPj9vVfE2x7ozKXprzL3V8n6XeC/uwKJvmO6WeSDuvoY5poP0dx9y3uvkKZWRg/UmbmCwAAOEFIXAAAUELM7K1mduO4QpSLlbmU4juSuiTdamZ1wW2nm9nVwV0fkbTIzP7SzKrN7DQze1dw2z5JS49IGIyXlPQRM/tVM6vVL2tiDE8mZnd/XlJf8Dhb3X3vMTZ9StKApI+a2Rwza1KmHsY9E2x7mjJ1LV4ys/mS/uqI2/cpU79ionhGlCkaGg+eh3MlrZb01Ym2H8/MFprZqqDWxZCkQ5JGjnc/AAAwfSQuAAAoLa8oUzjyKTMbUCZh0S/pRnd/QNIaSfcEl0/0S1opSUEdiBXKJAL2SnpWmWKbknRf8Hu/mX1vgn1+SdLdylyOsUvSoDKFM6dikzIzHI4qypnl7q9JWhXE/DNJn5f0J8GskiN9RtKpwXbf0dFLq35WmaKgB83scxPcv12ZJMlzyiRV/kmZ4zyeCmVme7yozKU4F0u6fhL3AwAA02Tux5sdCgAAAAAAEA5mXAAAAAAAgKJF4gIAAAAAABQtEhcAAAAAAKBokbgAAAAAAABFi8QFAAAAAAAoWlUnc2dnnnmmL1269GTusiQNDAxo7ty5YYeBMsF4QiExnlBIjCcUEuMJhcR4QqExpibnu9/97s/c/fVH9p/UxMXSpUu1Y8eOk7nLktTb26umpqaww0CZYDyhkBhPKCTGEwqJ8YRCYjyh0BhTk2Nmz0/Uz6UiAAAAAACgaJG4AAAAAAAARYvEBQAAAAAAKFokLgAAAAAAQNEicQEAAAAAAIoWiQsAAAAAAFC0SFwAAAAAAICiReICAAAAAAAULRIXAAAAAACgaJG4AAAAAAAARYvEBQAAAAAAKFokLgAAAAAAQNEicQEAAAAAAIoWiQsAAAAAAFC0jpu4MLO3mNn3x/28bGZ/aWbzzWyrmT0b/J53MgIGAAAAAACzx3ETF+7+7+7+dnd/u6TfkPQLSQ9IukVSj7u/WVJP0AYAAACAknTppZeqoqJC0WhUFRUVuvTSS8MOCYCmfqnIckn/6e7PS7pC0qagf5OkKwsZGAAAAACcLJdeeqkef/xxtbW16eGHH1ZbW5sef/xxkhdAEaia4vZ/KCkZ/Huhu/9Ektz9J2b2hoJGBgAAAAAnydatW/XhD39Yn//859Xb26vPf/7zkqSurq6QIwNg7j65Dc1OkfSipDp332dmL7n7GeNuP+juR9W5MLPrJF0nSQsXLvyNe+65pzCRl7FDhw6ptrY27DBQJhhPKCTGEwqJ8YRCYjxhpqLRqB5++GHV1taOjadDhw7pve99r1KpVNjhocTxN2pyotHod9192ZH9U5lxsVLS99x9X9DeZ2ZvDGZbvFHSTye6k7t/UdIXJWnZsmXe1NQ0tchnod7eXvE8oVAYTygkxhMKifGEQmI8YabMTI899tjYjIumpiZdf/31MjPGFmaMv1EzM5XERbN+eZmIJD0k6YOSbg9+P1jAuAAAAADgpFmxYoU2btwoSbrssst0/fXXa+PGjbrkkktCjgzApBIXZvYrklZI+tC47tslfc3MWiS9IOnqwocHAAAAACfeli1bdOmll6qrq0sbN26UmemSSy7Rli1bwg4NmPUmlbhw919IWnBE335lVhkBAAAAgJKXTVIwrR8oLlNdDhUAAAAAAOCkIXEBAAAAAACKFokLAAAAAABQtEhcAAAAAACAokXiAgAAAAAAFC0SFwAAAAAAoGiRuAAAAAAAAEWLxAUAAAAAAChaJC4AAAAAAEDRInEBAAAAAACKFokLAAAAAABQtEhcAAAAAACAokXiAgAAAAAAFC0SFwAAAAAAoGiRuAAAAAAAAEWLxAUAAAAAAChaJC4AAAAAAEDRInEBAAAAAACKFokLAAAAAABQtEhcAAAAAACAokXiAgAAAAAAFC0SFwAAAAAAoGiRuAAAAAAAAEWLxAUAAAAAAChaJC4AAAAAAEDRInEBAAAAAACKFokLAAAAAABQtEhcAAAAAACAokXiAgAAAAAAFC0SFwAAAAAAoGiRuAAAAAAASclkUvX19Vq+fLnq6+uVTCbDDgmApKqwAwAAAACAsCWTScViMSUSCY2MjKiyslItLS2SpObm5pCjA2Y3ZlwAAAAAmPXi8bgSiYSi0aiqqqoUjUaVSCQUj8fDDg2Y9UhcAAAAAJj10um0Ghsbc/oaGxuVTqdDighAFokLAAAAALNeJBJRX19fTl9fX58ikUhIEQHIInEBAAAAYNaLxWJqaWlRKpXS8PCwUqmUWlpaFIvFwg4NmPUozgkAAABg1ssW4Gxvb1c6nVYkElE8HqcwJ1AESFwAAAAAgDLJi+bmZvX29qqpqSnscAAEuFQEAAAAAAAULRIXAAAAAACgaJG4AAAAAAAARYvEBQAAAAAAKFokLgAAAAAAQNEicQEAAAAAAIoWiQsAAAAAAFC0SFwAAAAAAICiNanEhZmdYWb3m9mPzCxtZr9lZvPNbKuZPRv8nneigwUAAAAAALPLZGdcfFbSN939rZLeJikt6RZJPe7+Zkk9QRtAkViwYIHMTNFoVGamBQsWhB0SAABAUVuyZEnO+6clS5aEHRIATSJxYWavk/Q7khKS5O6vuftLkq6QtCnYbJOkK09UkACmZsGCBTpw4IDq6uqUTCZVV1enAwcOkLwAAAA4hiVLlmjPnj1qaGjQfffdp4aGBu3Zs4fkBVAEJjPj4k2S/lvSl83sX83sH8xsrqSF7v4TSQp+v+EExglgCrJJi/7+fi1atEj9/f1jyQsAAAAcLZu02LZtm84880xt27ZtLHkBIFzm7vk3MFsm6TuSLnL3p8zss5JeltTu7meM2+6gux9V58LMrpN0nSQtXLjwN+65555Cxl+WDh06pNra2rDDQAmLRqNKJpNatGjR2Hjau3evmpublUqlwg4PJYy/TygkxhMKifGEmYpGo7rvvvt05plnjo2nn/3sZ7r66qt5/4QZ42/U5ESj0e+6+7Ij+yeTuFgk6TvuvjRo/7Yy9SzOk9Tk7j8xszdK6nX3t+R7rGXLlvmOHTumeQizR29vr5qamsIOAyXMzMZmXGTHU319vXbu3Knj/Z8H8uHvEwqJ8YRCYjxhpsxsbMZFdjxddNFF2r59O++fMGP8jZocM5swcVF1vDu6+14z22Nmb3H3f5e0XNK/BT8flHR78PvBAscMYJrmz5+vnTt3ysyO6gcAoJwkk0nF43Gl02lFIhHFYjE1NzeHHRZK0OLFi7V9+/aj3j8tXrw4pIgAZB03cRFol/SPZnaKpOckXatMfYyvmVmLpBckXX1iQgQwVS+99NKU+gEAKEXJZFKxWEyJREIjIyOqrKxUS0uLJJG8wJRdccUV2rBhw4T9AMI1qeVQ3f377r7M3S909yvd/aC773f35e7+5uA3Vf+AIjE6Oqra2lq5u1KplNxdtbW1Gh0dDTs0AAAKJh6PK5FIKBqNqqqqStFoVIlEQvF4POzQUIK6u7u1bt26nPdP69atU3d3d9ihAbPepBIXAErPk08+mbcNAECpS6fTamxszOlrbGxUOp0OKSKUsqGhIbW1teX0tbW1aWhoKKSIAGSRuADK1MUXX5y3DQBAqYtEIurr68vp6+vrUyQSCSkilLLq6mp1dXXl9HV1dam6ujqkiABkkbgAylBFRYUOHTqk0047TT/60Y902mmn6dChQ6qo4L88AKB8xGIxtbS0KJVKaXh4WKlUSi0tLYrFYmGHhhLU2tqqjo4OrV+/XoODg1q/fr06OjrU2toadmjArHfc5VALieVQJ4elclAIlZWVOTUtKioqNDIyEmJEKAf8fUIhMZ5QCEeuACGJpSsxbTU1NTmXhlRXV2twcDDEiFAuOOdNzrGWQ+XrV6BMjYyM5BSXImkBACg345MWb3vb2ybsByZrwYIFGhoaUl1dnZLJpOrq6jQ0NKQFCxaEHRow65G4AAAAQElzd33mM59hpgVm5MCBA6qrq1N/f78WLVqk/v5+1dXV6cABFk8EwkbiAgAAACXr0ksvzdsGpuKxxx7L2wYQDhIXAAAAKFlbtmzJ2wam4rLLLsvbBhCOqrADAAAAAGaCmhYohPnz52vnzp2qrq7Wa6+9plNOOUWvvfaa5s+fH3ZowKzHjAsAAAAAs96GDRtkZnrttdckSa+99prMTBs2bAg5MgAkLgAAAFDSxq+iBUxXPB5XT09Pznjq6elRPB4POzRg1iNxAQAAgJJ122235W0Dk5VOp9XY2JjT19jYqHQ6HVJEALJIXAAAAKBkfepTn8rbBiYrEomor68vp6+vr0+RSCSkiABkkbgAAABASTMzfeELX6BIJ2YkFouppaVFqVRKw8PDSqVSamlpUSwWCzs0YNZjVREAAACUJHcfS1bcc889Of3AVDU3N0uS2tvblU6nFYlEFI/Hx/oBhIcZFwAAAChJ42dYvOMd75iwH5iK5uZm9ff3q6enR/39/SQtgCJB4gIAAAAlzd21bt06ZloAQJkicQEAAICS9d73vjdvGwBQ+khcAAAAoGQ9/PDDedsAgNJHcU4AAACUNDPTO97xDn3ve98LOxQAwAnAjAugTNXW1srMFI1GZWaqra0NOyQAkCQlk0nV19dr+fLlqq+vVzKZDDsklKjxNS3GJy2odYHpWrJkSc77pyVLloQdEkoc57zCYMYFUIZqa2s1MDCgpUuX6m/+5m/08Y9/XLt371Ztba0OHToUdngAZrFkMqlYLKZEIqGRkRFVVlaqpaVFkqjejyk71uohZkbyAlO2ZMkS7dmzRw0NDfrIRz6iO++8U9u3b9eSJUv0wgsvhB0eShDnvMJhxgVQhrJJi127dumcc87Rrl27tHTpUg0MDIQdGoBZLh6PK5FIKBqNqqqqStFoVIlEQvF4POzQUMLcXalUimQFZiSbtNi2bZvOPPNMbdu2TQ0NDdqzZ0/YoaFEcc4rHBIXQJn61re+lbcNAGFIp9NqbGzM6WtsbFQ6nQ4pIgD4pfvvvz9vG5gKznmFQ+ICKFPvec978rYBIAyRSER9fX05fX19fYpEIiFFBAC/dNVVV+VtA1PBOa9wqHEBlKG5c+dq9+7dR137O3fu3JAiAoCMWCymlpaWset9U6mUWlpamDaLGTlWrQtgKhYvXqzt27eroqJC7j5WK2Xx4sVhh4YSxTmvcEhcAGVoeHh4Sv0AcLJki5G1t7crnU4rEokoHo9TpAzTcqwinCQyMB1XXHGFNmzYMDamsr+vuOKKMMNCCeOcVzhcKgKUoaGhIS1cuDCnWNnChQs1NDQUdmgAoObmZvX396unp0f9/f28gcO0ubtqampyznfZNjBV3d3dWrduXc54Wrdunbq7u8MODSWMc15hkLgAylRvb2/eNgAA5YDzHQplaGhIbW1tOX1tbW188QMUARIXQJlqamrK2wYAoBxwvkOhVFdXq6urK6evq6tL1dXVIUUEIIvEBVCGqqurtW/fPi1atEi7d+/WokWLtG/fPk68AICyYmYaHBzUqaeeqp07d+rUU0/V4OAgNS4wLa2trero6ND69es1ODio9evXq6OjQ62trWGHBsx6FOcEylD2Tdu+fft07bXX5vQDAFAuRkdHx5IXN9xwQ04/MFWdnZ165plndNNNN42tKrJixQp1dnaGHRow6zHjAihDS5YskSQ1NDTovvvuU0NDQ04/APCvwfgAACAASURBVADlYPzMivHnOGZcYDqSyaSeffZZ9fT0aOvWrerp6dGzzz6rZDIZdmjArEfiAihDe/bsUUNDg7Zt26YzzzxT27ZtU0NDg/bs2RN2aAAAFJy7a9OmTawmghmJx+NKJBKKRqOqqqpSNBpVIpFQPB4POzRg1iNxAZSp+++/P28bAIByUF9fn7cNTFY6nVZjY2NOX2Njo9LpdEgRAcgicQGUqauuuipvGwCActDf35+3DUxWJBJRX19fTl9fX58ikUhIEQHIojgnUIYWL16s7du366KLLtJHPvIRXXTRRdq+fbsWL14cdmgAABQcNS1QCLFYTFdccYUGBwd1+PBhzZkzRzU1NfrCF74QdmjArMeMC6AMvfDCC2PJi6uvvnosafHCCy+EHRoAAEBR2r59uwYGBjR//nyZmebPn6+BgQFt37497NCAWY/EBVCmXnjhBbm7UqmU3J2kBQCgLFVUVOSc7yoqeHuL6enu7tYdd9yhvXv36oknntDevXt1xx13qLu7O+zQgFmPv+wAAAAoWY8//njeNjBZQ0NDamtry+lra2vT0NBQSBEByCJxAQAAgJJ1ySWX5G0Dk1VdXa2urq6cvq6uLlVXV4cUEYAsEhcAAAAoWaOjo6qsrNSOHTtUWVmp0dHRsENCiWptbVVHR4fWr1+vwcFBrV+/Xh0dHWptbQ07NGDWY1URoExNVGHd3UOIBACAE8PdZWYaHR3VzTffnNMPTFVnZ6e6u7t14403jvVVV1ers7MzxKgASMy4AMrS+KRFPB6fsB8AgFJ3rPMa5ztMx4IFCzQ0NKS6ujolk0nV1dVpaGhICxYsCDs0YNYjcQGUMXdXQ0MD3zwBAMra+FVFgOk6cOCA6urq1N/fr0WLFqm/v191dXU6cOBA2KEBsx6JC6BMPfLII3nbAAAAyPXYY4/lbQMIB4kLoExdfvnledsAAADIddlll+VtAwjHpIpzmtluSa9IGpE07O7LzGy+pHslLZW0W9IfuPvBExMmgOngGl8AwGzA+Q6FMH/+fO3cuVP19fX62Mc+pvr6eu3cuVPz588POzRg1pvKjIuou7/d3ZcF7Vsk9bj7myX1BG0AAADgpDhWTQtqXWA69u/fP5a8aG5uHkta7N+/P+zQgFlvJpeKXCFpU/DvTZKunHk4AAqloqIip1hZRQVXhgEAyo+755zvSFpgJvbv358znkhaAMXBJvPH3cx2STooySV9wd2/aGYvufsZ47Y56O7zJrjvdZKuk6SFCxf+xj333FOw4MvVoUOHVFtbG3YYKGHRaFR33HGHli1bNjaeduzYoZtvvlmpVCrs8FDC+PuEQmI8oZAYTygkxhMKjTE1OdFo9LvjrvIYM9nExVnu/qKZvUHSVkntkh6aTOJivGXLlvmOHTumHv0s09vbq6amprDDQAkzM1VUVGhkZGRsPFVWVmp0dJRvojAj/H1CITGeUEiMJxQS4wmFxpiaHDObMHExqbnj7v5i8Punkh6Q9E5J+8zsjcGDv1HSTwsXLoCZGh0dVWVlpXbs2DGWtAAAAACAUnPcxIWZzTWz07L/lnSJpH5JD0n6YLDZByU9eKKCBDA12VkVo6Ojuvnmm8eSFsy2wHQlk0nV19dr+fLlqq+vVzKZDDskAJCUmWVoZopGo2P/BqZrzpw5OeNpzpw5YYcEQJObcbFQUp+Z/UDSP0t61N2/Kel2SSvM7FlJK4I2gCJQWVkpSaqtrdXGjRvHrqfL9gNTkUwmFYvF1NnZqS1btqizs1OxWIzkBYDQHStJQfIC0zFnzhwNDw9r3rx56u7u1rx58zQ8PEzyAigCx01cuPtz7v624KfO3eNB/353X+7ubw5+Hzjx4QKYjNHRUdXW1uqVV17RW9/6Vr3yyiuqra3lchFMSzweVyKRUDQaVVVVlaLRqBKJhOLxeNihAYAk5awCAUxXNmlx4MABnXfeeTpw4MBY8gJAuFgfEShTTz75ZN42MFnpdFqNjY05fY2NjUqn0yFFBADAicH7J6A4kbgAytTFF1+ctw1MViQSUV9fX05fX1+fIpFISBEBAHBi8P4JKE5VYQcAoPAqKip06NCho67xraggV4mpi8ViamlpUSKR0MjIiFKplFpaWrhUBEDRoKYFCqGqqkoHDx48ajxVVfGRCQgb/wuBMnSsWhbUuMB0NDc3S5La29uVTqcViUQUj8fH+gEAKAfHqmVBjQsgfHz9CpQxipWhUJqbm9Xf36+enh719/eTtABQVDjfoZAYT0DxIXEBlKlHHnkkbxsAgHLwjW98I28bmIo1a9bkbQMIB4kLoExdfvnledsAAJSDK6+8Mm8bmIqOjo68bQDhIHEBlDEz0/bt2ylaBgAoa2amb3/725zvUBBmprvvvpvxBBQREhdAGRp/TWYsFpuwHwCAUjf+vPaJT3xiwn5gssaPmy996UsT9gMIB4kLoAyN/4Zg/JKVfHMAACgnxzqvcb7DdGTHTUVFhe64446xZeQZT0D4SFwAZczd1dDQwDcFAICyxioQKJSKigqNjIxo2bJlGhkZGUteAAgX/xOBMsWqIgAAAFPz+OOP520DCAeJC6BMsaoIAADA1FxyySV52wDCQeICKGOsKgIAmA3MTNFolPMdZmx0dFSVlZXasWOHKisrNTo6GnZIAETiAihLrCoCAJgNjnVe43yH6ciOm9HRUd18881jSQvGExA+EhdAmXL3nGJlnHQBAOWI8x0KifEEFCcSFwAAAAAAoGiRuAAAAAAAAEWLxAUAAAAAAChaJC4AAMBJ1d7erpqaGkWjUdXU1Ki9vT3skFDCzCxnVRFWFsFMJJNJ1dfXa/ny5aqvr1cymQw7JACSqsIOAAAAzB7t7e3q6urSmjVrdP755+vf/u3f1NHRIUnq7OwMOTqUmmMlKcyMooqYsmQyqVgspkQioZGREVVWVqqlpUWS1NzcHHJ0wOzGjAsAAHDSdHd3a82aNVq9erVqamq0evVqrVmzRt3d3WGHhhI2fhUIYLri8bgSiYSi0aiqqqoUjUaVSCQUj8fDDg2Y9UhcAACAk2ZoaEhtbW05fW1tbRoaGgopIgDISKfTamxszOlrbGxUOp0OKSIAWSQuAADASVNdXa2urq6cvq6uLlVXV4cUEQBkRCIR9fX15fT19fUpEomEFBGALGpcAGUqmUwqHo8rnU4rEokoFotxfSaA0LW2to7VtDj//PO1fv16dXR0HDULA5gKCnKiEGKxmN7//vdr7ty5euGFF7RkyRINDAzos5/9bNihAbMeiQugDFFcCkCxyhbgvO222zQ0NKTq6mq1tbVRmBNAUaFeClBcuFQEKEMUlwJQzDo7OzU4OKhUKqXBwUGSFpgxinOiEOLxuO69917t2rVLTzzxhHbt2qV7772X909AESBxAZQhiksBAGaLP/uzP8vbBiaL909A8SJxAZQhiksBAGaLv//7v8/bBiaL909A8SJxAZShWCymlpYWpVIpDQ8PK5VKqaWlRbFYLOzQAAAoODPTnXfeSZFOzAjvn4DiRXFOoAw1Nzdr+/btWrly5Vjxu9bWVgpzAgDKiruPJSseeuihnH5gqpqbm3XXXXdp+fLlY2NrxYoVvH8CigAzLoAylEwm9eijj2rz5s3aunWrNm/erEcffVTJZDLs0AAAKJhjzbBg5gWmo729XU888YTWrl2rzZs3a+3atXriiSfU3t4edmjArEfiAihDrCoCAJhNWFUEhdDd3a01a9Zo9erVqqmp0erVq7VmzRp1d3eHHRow65G4AMoQVbEBALPF6173urxtYLKGhobU1taW09fW1qahoaGQIgKQReICKENUxQYAzBYvv/xy3jYwWdXV1erq6srp6+rqUnV1dUgRAcgicQGUIapiAyhmyWRS9fX1Wr58uerr66m/gxkzM0WjUWpbYEZaW1t10003jV1mW1VVpZtuukmtra1hhwbMeqwqApShbPXr9vZ2pdNpRSIRxeNxqmIDCF0ymVQsFlMikdDIyIgqKyvV0tIiSfyNAgAAE2LGBVCmmpub1d/fr56eHvX39/OBAEBRoHgwTgSKc6IQuru7tXbt2rHZqsPDw1q7di3FOYEiQOICAACcNBQPRqEdWX+AegSYLopzAsWLxAUAADhpKB6MQjvyQyUfMjFdFOcEihc1LgAAwEmTLR6crXGRLR7MpSKYCYpyohBaW1vV0dEhSTr//PO1fv16dXR0HDULA8DJR+ICKFMTvYnj2l8AYWtubtb27du1cuVKDQ0Nqbq6Wq2trdThwbS4O+c7FExnZ6c2bNigG2+88ah+AOHiUhGgDGXfxJmZbr/99pw2AIQpmUzq0Ucf1ebNm7V161Zt3rxZjz76KEuiYlqOdV7jfIfpqKjIfDSqqanRhg0bVFNTk9MPIDz8LwTKlJlpdHRU73rXuzQ6OsqbOABFgVVFcCKwqggKwd1VU1OjV199VXV1dXr11VdVU1PDuAKKAIkLoExt3rw5bxsAwsCqIgCKWW9vb942gHCQuADK1MqVK/O2ASAMrCoCoJg1NTXlbQMIx6QTF2ZWaWb/amaPBO1fNbOnzOxZM7vXzE45cWECmCp3V0VFhZ566ilVVFQwzRFAUciuKpJKpTQ8PDy2qkgsFgs7NJQwM1M0GuWySMyImWlwcDBnPGXbwHQlk0nV19dr+fLlqq+vp6bTNE1lVZG/kJSW9LqgvUbSne5+j5l1SWqRtLHA8QGYhmyVdXfXLbfcktMPAGHKrh7S3t6udDqtSCSieDzOqiIAQnfKKadoaGhown5gOpLJpGKx2NgS4JWVlWppaZEkzntTNKkZF2Z2jqTfk/QPQdskvVvS/cEmmyRdeSICBDA97p5TrIykBYBi0dzcrP7+fvX09Ki/v583b5gxinOiEIaGhrRw4cKc8bRw4cIJkxnAZFCQunAmO+PiM5I+Kum0oL1A0kvuPhy0fyzp7InuaGbXSbpOkhYuXEiBm0k4dOgQz9MsF41Gww7hmFKpVNghIET8fUIhMZ5QKL29vUeNJ8YWpuP222/PGU+33367rr32WsYTpiWdTmtkZCRnTI2MjCidTjOmpui4iQszu1zST939u2bWlO2eYNMJU9zu/kVJX5SkZcuWOQVujq+3t5dCQLNcIb8xWnrLo9p9++8V7PEwu/H3CYXEeEKhNDU1HTWeGFuYjltuuUV79+4dG0+LFi2SxHjC9EQiEVVWVub8jUqlUopEIoypKZrMpSIXSVplZrsl3aPMJSKfkXSGmWUTH+dIevGERAgAAADkQXFOFEJ1dbX27dunRYsWaffu3Vq0aJH27dun6urqsENDiaIgdeEcd8aFu98q6VZJCmZc3OTuHzCz+yRdpUwy44OSHjyBcQIAAAA5ssWoJ+oHpiq7gsi+fft07bXX5vQD00FB6sKZ9HKoE+iQtNrM/kOZmheJwoQEAAAAHN/4pMW8efMm7Acmq6amRlKmLt+Xv/xlLVy4MKcfmA4KUhfGlBIX7t7r7pcH/37O3d/p7ue5+9XuTrldAAAAnHTurq9//evMtMCMZFcV2bt3r5YuXaq9e/eyqghQJGYy4wIAAAAI1bnnnpu3DUzFkSs9sPIDUBxIXAAAAKBkPf/883nbwFQcudIDKz8AxYHEBQAAAEqamel973sftS0wI6wqAhQvEhcAgONKJpOqr6/X8uXLVV9fr2QyGXZIKGG1tbU5y1fW1taGHRJK1PiaFgcPHpywH5is7Ooh2VVF9u3bl9MPIDwkLgAAeSWTScViMXV2dmrLli3q7OxULBYjeYFpqa2t1cDAgJYuXaq7775bS5cu1cDAAMkLTMuxZlgw8wLTMX7cfPSjH52wH0A4SFwAAPKKx+NKJBKKRqOqqqpSNBpVIpFQPB4POzSUoGzSYteuXTrnnHO0a9euseQFMF3urlQqxUwLFIS7a+XKlYwnoIiQuAAA5JVOp9XY2JjT19jYqHQ6HVJEKHXf+ta38rYBICx33XVX3jaAcJC4AADkFYlE1NfXl9PX19enSCQSUkQode95z3vytgEgLNdcc03eNoBwVIUdAACguMViMbW0tCiRSGhkZESpVEotLS1cKoJpmTt3rnbv3n3UNeNz584NKSKUA2oQoJAYTyikZDKpeDyudDqtSCSiWCym5ubmsMMqOSQuAAB5ZU+u7e3tYyfdeDzOSRfTcu2112rDhg0T9gNT5e4TfsikNgGAYpAtcJ798qeyslItLS2SxPuoKeJSEQDAcTU3N6u/v189PT3q7+/nZItp6+7u1rp163KKKa5bt07d3d1hh4YS5e4544mkBWaKYq8oFAqcFw6JCwAAcNIMDQ2pra0tp6+trU1DQ0MhRYSwmVlBfqLRaMEei0sFZq/7778/bxuYCgqcFw6JCwAAcNJUV1erq6srp6+rq0vV1dUhRYSwZWdJzPTn3I5HCvZYfNM+e1111VV528BUUOC8cEhcAACAk6a1tVUdHR1av369BgcHtX79enV0dKi1tTXs0ABAUmYWUG9vLzNvMGPZAuepVErDw8NjBc5jsVjYoZUcinMCAICTprOzU1/+8pd14403jvXNnTtXnZ2dIUYFALnFXj/5yU/m9APTQYHzwmHGBQAAOGmWLFmigYEBNTQ06L777lNDQ4MGBga0ZMmSsEMDMMtVVGQ+GtXU1GjDhg2qqanJ6QemgwLnhcH/QgAAcNLs2bNHDQ0N2rZtm84880xt27ZNDQ0N2rNnT9ihAZjl3F01NTV69dVXVVdXp1dffVU1NTXMuACKAIkLAABwUlG1H0Cx6u3tzdsGEA4SFwAA4KSiaj+AYtXU1JS3DSAcJC4AAMeVTCZVX1+v5cuXq76+XslkMuyQUKIWL16s7du3y8wUjUZlZtq+fbsWL14cdmgAZjkz0+DgoE499VTt3LlTp556qgYHB1ldBDPCe6jCYFURAEBeyWRSsVhMiURCIyMjqqysVEtLiyRRYApTNjAwMKV+ADhZRkdHVVFRocHBQd1www2SMsmM0dHRkCNDqeI9VOEw4wIAkFc8HlcikVA0GlVVVZWi0agSiYTi8XjYoaEEHThwQHV1dXJ3pVIpubvq6up04MCBsEMDAI2Ojub8fSJpgZngPVThMOMCAJBXOp1WY2NjTl9jY6PS6XRIEaHUPfbYY0e1zz333JCiAVBOivWyDlYmmZ14D1U4zLgAAOQViUTU19eX09fX16dIJBJSRCh1l112Wd42AEyXuxfk59yORwr2WCQtZi/eQxUOiQsAQF6xWEwtLS1KpVIaHh5WKpVSS0uLYrFY2KGhBM2fP187d+5UfX299u7dq/r6eu3cuVPz588POzQAAAqK91CFw6UiAIC8mpub1dHRoXe/+91jfYsXL6aoFKZl//79MjPt3LkzZwzt378/xKgAACi85uZm3XXXXVq+fLncXWamFStW8B5qGphxAQDI68ILL9SePXu0atUqPfDAA1q1apX27NmjCy+8MOzQUILmzJkjSZo3b566u7s1b968nH4AAMpFe3u7nnjiCa1du1abN2/W2rVr9cQTT6i9vT3s0EoOiQsAQF5PP/20Vq1apQcffFBnnHGGHnzwQa1atUpPP/102KGhBA0PD2vevHk6cOCAzjvvPB04cEDz5s3T8PBw2KEBAFBQ3d3dWrNmjVavXq2amhqtXr1aa9asUXd3d9ihlRwSFwCA40okEnnbwFQ8+eSTedsAAJSDoaEhtbW15fS1tbVpaGgopIhKF4kLAMBxtbS05G0DU3HxxRfnbQMAUA6qq6vV1dWV09fV1aXq6uqQIipdFOcEAOR1wQUX6KGHHlJFRcVYYSl31wUXXBB2aChBVVVVOnjwoMzsqH4AAMpJa2urbrzxRt144405/TfccENIEZUuZlwAAPLKfhueXYc++5tvyTEdx6plQY0LAEC5eeaZZyRJFRUVOb+z/Zg8EhcAgLy6u7u1bt06ubtSqZTcXevWraOwFGZk/HgCAKAcbd26VR/+8Ic1MjKiVCqlkZERffjDH9bWrVvDDq3kMC8TAJDXsQpLHTntEZisb3zjG0e1r7zyypCiwXS87ZOP6+evHg47jKMsveXRsEPIcfqpc/SDv7ok7DAAhMTd9elPfzqn79Of/rQ2btwYUkSli8QFACCvbGGp1atXj/VRWAozceWVV+bMtCBpUXp+/uph7b7998IOI0dvb6+amprCDiNHsSVSAJxcZqZbb71Vn//858f6br311qPqPOH4uFQEAJBXa2urOjo6tH79eg0ODmr9+vXq6OhQa2tr2KGhhJmZvv3tb/PmDQBQtlasWKGNGzfq+uuv16FDh3T99ddr48aNWrFiRdihlRxmXAAA8urs7NSTTz6Zc2nIBRdcoM7OzhCjQqnKrkwjSZ/4xCdy+gEAKCdbtmxRbW2tNm7cOHZ5yNy5c7Vly5aQIys9zLgAAOTV3t6udDqtdevWafPmzVq3bp3S6bTa29vDDg0laPwMi3e+850T9gMAUA4uvPBCDQwMaNWqVXrggQe0atUqDQwM6MILLww7tJJD4gIAkFd3d7fWrFmj1atXq6amRqtXr9aaNWtYVQQz4u5as2YNMy0AAGXr6aef1qpVq/Tggw/qjDPO0IMPPqhVq1bp6aefDju0ksOlIgCAvFhVBIX2+7//+0e1H3jggZCiwXScFrlFF2y6JewwjrYp7ABynRaRpOIqYgrg5EokEke1X//614cUTekicQEAyItVRVBoRyYpSFqUnlfSt7OqyCSwqgiAlpYWPfjggzltTB2JCwBAXtlVRSTp/PPPH1tV5MhZGMBUUNMCAFDuLrjgAj300EOqrKzU6OioKioqNDo6qgsuuCDs0EoOiQsAQF7Z1UNuu+02DQ0Nqbq6Wm1tbawqAgAAkMfFF1+sp59+WqOjo5I09vviiy8OM6ySRHFOAMBxdXZ2anBwUKlUSoODgyQtMCNz5syRuyuVSsndNWfOnLBDAgCg4Lq7u7Vu3bqcc966desocD4NJC4AAMBJlUql8rYBACgHxypwPjQ0FFJEpYvEBQAAOKmi0WjeNgAA5SBb4Hw8CpxPz3ETF2ZWY2b/bGY/MLOdZvbJoP9XzewpM3vWzO41s1NOfLgAAKDUHT58WKeccop++MMf6pRTTtHhw4fDDgkAgILLFjhfv369BgcHxwqct7a2hh1ayZlMcc4hSe9290NmNkdSn5ltlrRa0p3ufo+ZdUlqkbTxBMYKAABKnLvLzHT48GH9xV/8RU4/AADlpLOzU88884xuuummsfPfihUrqBU2DcedceEZh4LmnODHJb1b0v1B/yZJV56QCAEAQNkYvwxqPB6fsB8AgHKQTCb17LPPqqenR1u3blVPT4+effZZJZPJsEMrOZOqcWFmlWb2fUk/lbRV0n9Kesndh4NNfizp7BMTIgAAKDfuroaGBmZaAADKVjweVyKRUDQaVVVVlaLRqBKJRE7iHpMzmUtF5O4jkt5uZmdIekBSZKLNJrqvmV0n6TpJWrhwoXp7e6cX6Sxy6NAhnicUFOMJhcLfJxRCPB5Xb2/v2HiKx+OKxWKMrRKz9JZHww7haN8srpjmzuEcXMp47TBT6XRaIyMjOee8kZERpdNpxtcUTSpxkeXuL5lZr6TflHSGmVUFsy7OkfTiMe7zRUlflKRly5Z5U1PTjAKeDXp7e8XzhIL55qOMJxQMf59QCLFYTP7/t3f/wXFd53nHnxcLGGBI1ZQqV6NGVMn6R7QUiNgp67o0EmEJk07sDOgk9qirpnXULV1KIcYpJRuUtmkapzsiIoqtB0rJml7LSmqvZcuRgTElUwhwIRdSE1t2bQLUOlbGVEM5dpWIkkMoBAQs3v7BBYoVqSWxuMTdC34/MxzgHIK7D4gzi4sX577HfX49zZ0qwtqKj+c6ok5wrvV7j+i5fe+POgZWCq6fEIJkMqlEIqGOjo7573lBECiZTLK+FuliThV5U3mnhcxslaT3SCpKCiR9sPxhH5bUf6lCAgCiVSgU1Nraqs7OTrW2tnJvJpbMzJRKpehtAQBYsbLZrG6++WZt2LBBW7du1YYNG3TzzTcrm81GHS12LmbHxbWSHjSzhM4WOr7o7l81s2ckfcHM/rOk/y0pfwlzAgAiUigUlM1mlc/nVSqVlEgklMlkJEnpdDridAAAAPWPQv3SXMypIsfc/R3u3ubure7+ifL8D9z9ne7+Fnf/kLtPXfq4AIDlRmMphK2hoUHuriAI5O5qaLioXuEAAMRKLpfTQw89pBMnTmhoaEgnTpzQQw89xDVUDbhSAABUVSwW1d7eXjHX3t6uYrEYUSLE3eOPP151DADASsA1VHgoXAAAqkomkxodHa2YGx0dVTJ5vgOmgAvbvn171TEAACsB11DhoXABAKgqm80qk8koCALNzMwoCAJlMhkaS6Fms7OzSiQSevrpp5VIJDQ7Oxt1JAAAQsc1VHgWdRwqAODyM9eAs7u7W8ViUclkUrlcjsacqIm7y8w0Ozurj33sYxXzAACsJFxDhYcdFwCAC0qn0xofH9fQ0JDGx8f5hoslcfeK5pwULQAAKxXXUOGgcAEAAAAAAOoWhQsAAAAAAFC3KFwAAAAAAIC6ReECAAAAAADULQoXAAAAAACgblG4AAAAAAAAdYvCBQAAAAAAqFsULgAAAAAAQN2icAEAAAAAAOoWhQsAAAAAAFC3KFwAAAAAAIC6ReECAAAAAADUrcaoAwAAAODyZWbhPVZvaA8ldw/vwQAAS8KOCwAAAETG3UP5EwRBaI9F0QIA6guFCwAAsKwKhYJaW1vV2dmp1tZWFQqFqCMhxhKJhMxMqVRKZqZEIhF1JABAyLhVBAAALJtCoaBsNqt8Pq9SqaREIqFMJiNJSqfTEadD3CQSCc3OzmrNmjW699579bGPfUwTExNKJBIqlUpRxwMAhIQdFwAAYNnkcjnl83mlUik1NjYqlUopn88rl8tFHQ0xNFe0OH36tG644Qadh0YbTwAAIABJREFUPn1aa9as0ezsbNTRAAAhonABAACWTbFYVHt7e8Vce3u7isViRIkQd0888UTVMQAg/ihcAACAZZNMJjU6OloxNzo6qmQyGVEixN1NN91UdQwAiD8KFwAAYNlks1llMhkFQaCZmRkFQaBMJqNsNht1NMRQQ0ODJiYmdMUVV+h73/uerrjiCk1MTKihgUtcAFhJeFUHAADLJp1Oa2ZmRlu3btW2bdu0detWzczM0JgTNZlrwDkxMaHbbrtNExMTFfMAEDVO0goHhQsAALBs2tradPLkSXV1demRRx5RV1eXTp48qba2tqijIYbmdla0tLTo/vvvV0tLS8U8AERp7iStvr4+HT16VH19fcpmsxQvasCrOgAAWDZjY2Pq6upSf3+/1q5dq/7+fnV1dWlsbCzqaIghd1dLS4vOnDmjG2+8UWfOnFFLS4vcPepoAMBJWiGicAEAAJZVPp+vOgYWY2RkpOoYAKLCSVrhoXABAACWVSaTqToGFqOjo6PqGACiwkla4aFwAQAAls2mTZs0MDCgHTt26OWXX9aOHTs0MDCgTZs2RR0NMWRmmpyc1KpVq3T8+HGtWrVKk5OTMrOoowEAJ2mFqDHqAAAA4PJx7NgxtbW1aWBgQAMDA5LOFjOOHTsWcTLE0ezs7HzxYvfu3RXzABC1uROzuru7VSwWlUwmlcvlOEmrBuy4AAAAy+rYsWNydwVBIHenaIGaLdxZ8YlPfOK88wAQpXQ6rfHxcQ0NDWl8fJyiRY0oXAAAACDW3F0///M/z2kiALBCUbgAAABAbH3lK1+pOgYAxB+FCwAAAMTWBz7wgapjAED8UbgAAFxQoVBQa2urOjs71draqkKhEHUkxFhbW5vMTKlUSmamtra2qCMh5hauJwDAysOpIgCAqgqFgrLZrPL5vEqlkhKJhDKZjCTRYAqL1tbWprGxMXV1denWW2/VAw88oIGBAbW1tdGkE4u2fft2Pf744+edBwCsHOy4AABUlcvllM/nlUql1NjYqFQqpXw+r1wuF3U0xNBc0aK/v19r165Vf3+/urq6NDY2FnU0xNDg4KBuu+22ilNqbrvtNg0ODkYdDQAQIgoXAICqisWi2tvbK+ba29tVLBYjSoS4y+fzVcfAxXJ33XPPPRVz99xzD6eLAMAKQ+ECAFBVMpnU6Ohoxdzo6KiSyWREiRB3c7cavd4YuFhmprvuuqti7q677qLXBQCsMBQuAABVZbNZZTIZBUGgmZkZBUGgTCajbDYbdTTE0KZNmzQwMKAdO3bo5Zdf1o4dOzQwMKBNmzZFHQ0xtG3bNh08eFC33367JiYmdPvtt+vgwYPatm1b1NEAACGy5dxKt3nzZn/66aeX7fniamRkRB0dHVHHwAqxfu8RPbfv/VHHQMy9973v1eDgoNxdZqZt27bp6NGjUcdCTJ3vt+Fs7UetWE+4FLh+QlgKhYJyuZyKxaKSyaSy2SzNzasws2+5++bXzrPjAgBQVXd3t4aHh7V//3499thj2r9/v4aHh9Xd3R11NMRQU1OTJOnKK6/U4cOHdeWVV1bMA4uxsGhxyy23nHceAKIydzJbX1+fjh49qr6+PmWzWY6VrwGFCwBAVYcPH1Zvb6/27NmjlpYW7dmzR729vTp8+HDU0RBDMzMzuvLKK3Xq1Cm95S1v0alTp3TllVdqZmYm6miIMXfXzp072WkBoK5wMlt4KFwAAKqamprSrl27KuZ27dqlqampiBIh7p544omqY2Axfvu3f7vqGACiwsls4blg4cLM1plZYGZFMztuZh8tz19lZoNm9mz57ZWXPi4AYLk1Nzfr0KFDFXOHDh1Sc3NzRIkQdzfddFPVMbAYv/d7v1d1DABR4WS28DRexMfMSLrD3b9tZldI+paZDUr6DUlD7r7PzPZK2iup59JFBQBEYefOnerpOfvyvnHjRh04cEA9PT3n7MIALkZjY6NeeuklXXXVVfr93/99/eqv/qpeeuklNTZezCUJcH70tABQj7LZrG6++WatXr1af/mXf6nrr79er7zyij75yU9GHS12LniV4O4/kvSj8vunzawo6acl7ZDUUf6wByWNiMIFAKw4fX19kqS7775bU1NTam5u1q5du+bngcWYnp5WU1OTXnrpJe3cuVPS2WLG9PR0xMkQR7t379b9999/3nkAqCf04FmaRfW4MLP1kt4h6c8kXVMuaswVN/5B2OEAAPWhr69Pk5OTCoJAk5OTFC2wJNPT03J3BUEgd6dogZodPnxY9913X8V6uu+++2geDKAu5HI5PfTQQzpx4oSGh4d14sQJPfTQQzTnrMFF78s0szWSvizpt9z9by92S56ZfUTSRyTpmmuu0cjISA0xLy8TExP8PyFUrCeEhdcnhIn1hKWamprSxo0bNTIyMr+eNm7cqKmpKdYWlow1hKUqFosqlUoVr1GlUknFYpH1tUgXVbgwsyadLVp8zt3/uDz9f83sWnf/kZldK+mF8/1bd/+UpE9J0ubNm72jo2PpqVe4kZER8f+E0HztCOsJoeH1CWFiPWGpmpub9cwzz2jPnj3z6+nAgQNqbm5mbWFpuH5CCJLJpBKJhDo6OuZfo4IgUDKZZH0t0gULF3Z2a0VeUtHdDyz4qwFJH5a0r/y2/5IkBAAAAM6D5sEA6lk2m1Umk1E+n1epVFIQBMpkMtwqUoOL2XHxbkn/StKYmX2nPHe3zhYsvmhmGUl/KelDlyYiAABYSc53uylNy1CLvr4+PfDAA7rjjjvm51avXk0fHgB1IZ1OS5K6u7tVLBaVTCaVy+Xm53HxLtic091H3d3cvc3d317+86i7v+june7+1vLbU8sRGAAAxNdc0aKhoUH33nuvGhoaKuaBxZg7WnDLli360pe+pC1btuiVV17R9ddfH3U0AJB0tngxPj6uoaEhjY+PU7So0aJOFQEAAFiqhoYGlUolbd68WaVSab54ASzWyZMntWXLFj355JO6+uqr9eSTT2rLli06efJk1NEAACHiSgEAACyrxx9/vOoYWIyHH3646hgAEH8ULgAAwLLavn171TGwGB/84AerjgEA8UfhAgBwQYVCQa2trers7FRra6sKhULUkRBjs7OzMjOlUimZmWZnZ6OOhJhat26dnnrqqYr19NRTT2ndunVRRwMASVJbW1vFa1RbW1vUkWLpYk4VAQBcxgqFgrLZ7PxRXolEQplMRpJoMAUgUq/Xy4IeFwDqQVtbm8bGxtTV1aVbb71VDzzwgAYGBtTW1qZjx45FHS9W2HEBAKgql8spn88rlUqpsbFRqVRK+XyeM8ixJO6uIAg4BhWhYD0BqEdzRYv+/n6tXbtW/f396urq0tjYWNTRYocdF0CIfvZ3H9dPzkxHHeMc6/ceiTpChTeuatJ3f4d72uOiWCyqvb29Yq69vV3FYjGiRIi7t73tbeeMv//970eUBnH3K7/yK+eMH3nkkYjSAEClfD5/zvhNb3pTRGnii8IFEKKfnJnWc/veH3WMCiMjI+ro6Ig6RoV6K6SgumQyqdHRUaVSqfm50dFRJZPJCFMhzl5bpKBogaV4bZGCogWAepLJZNTf318xxuJxqwgAoKpsNqtMJqMgCDQzM6MgCJTJZJTNZqOOhhgzM6XTaZlZ1FGwApiZenp6WE8A6sqmTZs0MDCgHTt26OWXX9aOHTs0MDCgTZs2RR0tdthxAQCoaq4BZ3d3t4rFopLJpHK5HI05URN3n//h8sc//nHFPLBYC9fTN77xjYp5AIjasWPH1NbWpoGBAQ0MDEg6W8ygMefiseMCAHBB6XRa4+PjGhoa0vj4OEULLIm7VzRT5IdM1KpQKGjDhg0aHh7W4OCghoeHtWHDBo5sBlA3jh07VvE9j6JFbShcAAAAIJY49QgALg8ULgAAABBLnHoEAJcHChcAAACIpblTjxbi1CMAWHkoXAAALqi7u1stLS1KpVJqaWlRd3d31JEQYw0NDTIzpVIpmZkaGrgcQW2y2ay2bt1asZ62bt3KqUcA6kahUFBra6s6OzvV2tpKD54acaoIAKCq7u5uHTp0SL29vdq4caOeeeYZ9fT0SJL6+voiToe4aWhokLurpaVF+/fv15133qnJyUk1NDRodnY26niImVtuueV152kiDCBqhUJB2WxW+XxepVJJiURCmUxGkniNWiR+xQEAqOrw4cPq7e3Vnj171NLSoj179qi3t1eHDx+OOhpiaK5ocebMGd144406c+aMWlpaOFkES7KwYz8A1AsaCIeHHRcAgKqmpqa0a9euirldu3bpjjvuiCgR4m5kZOSc8bve9a5owiD2XtvPIplM0pwzZn72dx/XT85MRx3jHOv3Hok6QoU3rmrSd39ne9QxsAg0EA4PhQsAQFXNzc06dOiQ9uzZMz936NAhNTc3R5gKcdbR0aEzZ85UjIFavfYHAH4giJ+fnJnWc/veH3WMCiMjI3X32lRvhRRc2FwD4VQqNT9HA+HacKsIAKCqnTt3qqenRwcOHNDk5KQOHDignp4e7dy5M+poiCEz0+TkpFatWqXjx49r1apVmpyclJlFHQ0xZmb69V//ddYRgLqSzWaVyWQUBIFmZmYUBIEymQwNhGvAjgsAQFVzDTjvvvtuTU1Nqbm5Wbt27aIxJ2oyOzurhoYGTU5Oavfu3ZLO/tBJY07Uwt3nixU//OEPK+YBIGpzDTi7u7tVLBaVTCaVy+VozFkDdlwAAC6or69Pk5OTCoJAk5OTFC2wJLOzsxXNFClaoFbd3d1qbGzUfffdp8cee0z33XefGhsbObIZQN1Ip9MaHx/X0NCQxsfHKVrUiMIFAAAAYolTjwDg8kDhAgAAALH0eqceTU1NRZQIAHApULgAAABALM2derQQpx4BwMpDc04AAADE0s6dO3XnnXfq4x//uEqlkhKJhGZnZ/Wbv/mbUUcDAISIHRcAAAAAAKBuUbgAAABALB0+fFj79+/XzMyMgiDQzMyM9u/fT3NOAFhhuFUECNEVyb3a9ODeqGOc68GoA1S6IilJ7486BgAg5l6vOecdd9wRUSIAwKVA4QII0eniPj23r75+IB8ZGVFHR0fUMSqs33sk6ggAgBVgrjnnnj175udozgkAKw+FCwAAAMTSzp071dPTI0nauHGjDhw4oJ6ennN2YQAA4o3CBQAAAGKpr69P3//+93XnnXfK3WVm2rZtm/r6+qKOBgAIEc05AQAAEEuFQkHPPvushoaGNDg4qKGhIT377LMqFApRRwMAhIjCBQAAAGIpl8spn88rlUqpsbFRqVRK+XxeuVwu6mgAgBBRuAAAAEAsFYtFtbe3V8y1t7erWCxGlAgAcClQuAAAAEAsJZNJjY6OVsyNjo4qmUxGlAgAcClQuAAAAMuqUCiotbVVnZ2dam1tpR8BapbNZtXZ2SkzUyqVkpmps7NT2Ww26mgAgBBxqggAAFg2hUJB2WxW+XxepVJJiURCmUxGkpROpyNOh7jZvXu33F1NTU2anp6ef7t7927WEwCsIOy4AAAAy4ZmigjTqVOndOONN+rVV19VEAR69dVXdeONN+rUqVNRRwMAhIjCBQAAWDY0U0TYHn300apjAED8UbgAAADLhmaKCNv73ve+qmMAQPxRuAAAAMsmm80qk8koCALNzMwoCAJlMhmaKaImV111lY4fP67W1lb9+Mc/Vmtrq44fP66rrroq6mgAgBDRnBMAACybdDqtW265RVu3bj1nHlisF198UYlEQsePH59fQw0NDXrxxRcjTgYACBM7LgAAwLIxM0lSU1OTPvnJT6qpqaliHliMtrY2zc7OqqurS4888oi6uro0Ozurtra2qKMBAEJE4QIAACyrpqYmvfrqq2pra9Orr746X7wAFmtsbExdXV3q7+/X2rVr1d/fr66uLo2NjUUdDQAQIgoXAABgWQVBUHUMLEY+n686BgDEH4ULAACwrFKpVNUxsBiZTKbqGAAQfxcsXJjZZ8zsBTMbXzB3lZkNmtmz5bdXXtqYAIAoFQoFtba2qrOzU62trSoUClFHQoxNT0/LzJRKpWRmmp6ejjoSYmrTpk0aGBioWE8DAwPatGlT1NEAACG6mFNFPivpfkl/uGBur6Qhd99nZnvL457w4wEAolYoFJTNZpXP51UqlZRIJOZ/o8lJEFisTZs2nbf/AD9oohav18uCHhcAsLJccMeFu39d0qnXTO+Q9GD5/QclfSDkXACAOpHL5ZTP55VKpdTY2KhUKqV8Pq9cLhd1NMTQXDNFd1cQBHJ3miliyRauJwDAynMxOy7O5xp3/5EkufuPzOwfvN4HmtlHJH1Ekq655hqNjIzU+JSXj4mJCf6fYqzevnb1up7qMRPOr1gsqlQqaWRkZH49lUolFYtFvo6oya233lqxnm699VYNDAywnlCzhetp4Rzio96+Xlw/IWz1uqbiotbCxUVz909J+pQkbd682Ts6Oi71U8beyMiI+H+Kqa8dqbuvXV2upzr8f8LrSyaTSiQS6ujomF9PQRAomUzydURNHnjgAfX398+vpx07dkgS6wk1W/j6tHAOMVGH1wVcPyFsdbmmYqTWU0X+r5ldK0nlty+EFwkAUE+y2awymYyCINDMzIyCIFAmk1E2m406GmJorpnijh079PLLL2vHjh00U8SSLWzOCQBYeWrdcTEg6cOS9pXf9oeWCABQV+YacHZ3d6tYLCqZTCqXy9GYEzU5duyYmpqaNDAwoIGBAUlSY2Ojjh07FnEyxJG7n7dYQa8LAFhZLuY41IKk/yXpZ8zseTPL6GzBYpuZPStpW3kMAFih0um0xsfHNTQ0pPHxcYoWqNn111+vmZkZbdmyRV/60pe0ZcsWzczM6Prrr486GmKoUChow4YNGh4e1uDgoIaHh7VhwwaObAaAFeZiThVJu/u17t7k7te5e97dX3T3Tnd/a/nta08dAQAAOMfJkye1ZcsWPfnkk7r66qv15JNPasuWLTp58mTU0RBDnHoEAJeHWntcAAAA1OThhx+uOgYuVrFYVHt7e8Vce3u7isViRIkAAJcChQsAALCsPvjBD1YdAxcrmUxqdHS0Ym50dFTJZDKiRACAS+GSH4cKAAAwZ926dXrqqae0atUqTU5OqqWlRZOTk1q3bl3U0RBD2WxWW7duPWf+85//fARpAACXCjsuAADAsunt7VUikdDk5KQkaXJyUolEQr29vREnQxzdcssti5oHAMQThQsAALBscrmcBgcH5e4KgkDursHBQZopYkkWricAwMpD4QIAACwbmikibHfeeWfVMQAg/ihcAACAZUMzRYRt//79VccAgPijcAEAAJZNNptVJpNREASamZlREATKZDLKZrNRR0OMmZn+4A/+QGYWdRQAwCXAqSIAAGDZpNNpPfXUU/qlX/olTU1Nqbm5WTt37lQ6nY46GmLI3eeLFQ8//HDFPABg5WDHBQAAWDaFQkFHjhzRY489psHBQT322GM6cuSICoVC1NEQU+5e0ZyTogUArDwULgAAwLLJ5XLK5/NKpVJqbGxUKpVSPp/nVBEAAPC6KFwAAIBlw6kiAABgsShcAACAZcOpIgAAYLEoXAAAgGXDqSIAAGCxOFUEAAAsm7nTQ7q7u1UsFpVMJpXL5ThVBDVLJBKanZ2dHzc0NKhUKkWYCAAQNnZcAACAZZVOpzU+Pq6hoSGNj49TtEDN5ooWa9as0cGDB7VmzRrNzs4qkUhEHQ0AECIKFwAAAIiluaLF6dOndcMNN+j06dPzxQsAwMpB4QIAAACx9cQTT1QdAwDij8IFAAAAYuumm26qOgYAxB+FCwDABRUKBbW2tqqzs1Otra0qFApRR0KMdXd3q6WlRalUSi0tLeru7o46EmKqoaFBExMTMjOlUimZmSYmJtTQwCUugPrANVQ4OFUEAFBVoVBQNptVPp9XqVRSIpFQJpORJJoqYtG6u7t16NAh9fb2auPGjXrmmWfU09MjSerr64s4HeLm9XpZ0OMCQD3gGio8lKMBAFXlcjnl83mlUik1NjYqlUopn88rl8tFHQ0xdPjwYfX29mrPnj1qaWnRnj171Nvbq8OHD0cdDTHV1NQkd1cQBHJ3NTU1RR0JACRxDRUmChcAgKqKxaLa29sr5trb21UsFiNKhDibmprSrl27KuZ27dqlqampiBIh7oIgqDoGgKhwDRUeChcAgKqSyaRGR0cr5kZHR5VMJiNKhDhrbm7WoUOHKuYOHTqk5ubmiBIh7lKpVNUxAESFa6jwULgAAFSVzWaVyWQUBIFmZmYUBIEymYyy2WzU0RBDO3fuVE9Pjw4cOKDJyUkdOHBAPT092rlzZ9TREFPT09N6wxveoGPHjukNb3iDpqeno44EAJK4hgoTzTkBAFXNNY/q7u5WsVhUMplULpejqRRqMteA8+6779bU1JSam5u1a9cuGnOiJu4uM9P09LQ++tGPVswDQNS4hgoPOy4AABeUTqc1Pj6uoaEhjY+P8w0XS9LX16fJyUkFQaDJyUmKFqhZoVDQhg0bNDw8rMHBQQ0PD2vDhg0cNwigbnANFQ4KFwAAAIglOvYDwOWBwgUAAABiiY79AHB5oHABAACAWKJjPwBcHmjOCQC4oIaGhopmd2am2dnZCBMhzlhPCEs2m9XWrVvPmf/85z8fQRoAOFehUFAul5tvzpnNZulzUQN2XAAAqpr7IbOlpUX333+/Wlpa5O5qaOBbCBaP9YQw3XLLLfPvm9l55wEgKoVCQdlsVn19fTp69Kj6+vqUzWZpIFwDrhIAAFXN/ZB55swZ3XjjjTpz5sz8D5vAYrGecCm4u4aHh1lHAOoKDYTDw60iQMjW7z0SdYRzfa2+Mr1xVVPUEbBIIyMj54zf9a53RRMGscd6Qph6enrOGff29kaUBrW4IrlXmx7cG3WMcz0YdYBKVyQl6f1Rx8Ai0EA4PBQugBA9t6/+vpms33ukLnMhXjo6OnTmzJmKMVAr1hPC1Nvbq3379lWMES9jHx6LOsI5uH5CGOYaCKdSqfk5GgjXhltFAABVmZkmJye1atUqHT9+XKtWrdLk5GTF/eTAxWI94VIwMx08eJB1BKCuZLNZZTIZBUGgmZkZBUGgTCajbDYbdbTYYccFAKCq2dnZ+R82d+/eXTEPLBbrCWFy9/lixRe/+MWKeQCI2tzpId3d3fOniuRyOU4VqQE7LgAAVa1Zs0aStH79ev3RH/2R1q9fXzEPLEYikZB0dv0cPHhwfh3NzQOLsXCHxcI+Key8AFAv0um0xsfHNTQ0pPHxcYoWNaJwAQCo6pVXXtH69et14sQJXXfddTpx4oTWr1+vV155JepoiKHZ2VmtWbNGp0+f1g033KDTp09rzZo17LjAkri77rnnHnZaAMAKReECAHBBf/Inf1J1DCzGE088UXUMLMav/dqvVR0DAOKPwgUA4ILe8573VB0Di3HTTTdVHQOL8eUvf7nqGAAQfzTnBABUtXr1aj333HNqbGxUqVRSIpFQqVTS6tWro46GGGpoaNDExMQ5PQgaGvhdCmpHTwsAWNm4SgAAVHX48GFJUqlUqng7Nw8sxuvt1mEXD2qxffv2Rc0DAOKJwgUAoKpcLqfh4WG5u4IgkLtreHhYuVwu6miIocHBQd12220V6+m2227T4OBg1NEQQ6wnALg8ULgAAFRVLBbV3t5eMdfe3q5isRhRIsTZ3OkPC3EaBGrFegKAywOFCwBAVclkUqOjoxVzo6OjSiaTESVCnJmZ7rrrroq5u+66ix4FqAnrCQAuD0tqzmlmvyjpk5ISkj7t7vtCSQUAqBvZbFaZTEb5fF6lUklBECiTyXCrCGqybds2HTx4UJL0vve9T7fffrsOHjxITwLUhPUEAJeHmgsXZpaQ9AeStkl6XtI3zWzA3Z8JKxwAIHrpdFqS1N3drWKxqGQyqVwuNz8PLMbRo0f13ve+V4cOHdLBgwdlZtq+fbuOHj0adTTEEOsJAC4PS9lx8U5Jf+HuP5AkM/uCpB2SKFwAwAqTTqeVTqc1MjKijo6OqOMg5uZ+qGQ9IQysJwBY+ZbS4+KnJZ1cMH6+PAcAAAAAABCKpey4OF/Xo3NaOJvZRyR9RJKuueYajYyMLOEpLw8TExP8PyFUrCeEhdcnhIn1hDCxnhA21hPCxGvU0iylcPG8pHULxtdJ+qvXfpC7f0rSpyRp8+bNzha+C2OrI0L1tSOsJ4SG1yeEifWEMLGeECqunxAyXqOWZimFi29KequZbZD0Q0n/QtItoaQCAAAALsL5jj51P2cTMAAgxmouXLj7jJntlnRUZ49D/Yy7Hw8tGQAAAFDF+YoWc/MULwBg5VhKc065+6Pu/jZ3f7O758IKBQAAAFwsd1cQBBQrAGCFWsqtIivCpgc3RR3h/B6MOsC5xj48FnWEy8br/Qap5sfrDe+xuChcPrw+XTxeny6M9XTxWE9AfIV5DcX1U3zxPe/ixeV73mVfuKjHLxSNWxDmNzfWU3zx+oQwsZ4AXA7Cuobi9Sne+J638lz2hQsAAADEW9g7FQEA9WVJPS4AAACAqLzeb9fZlg8AKwuFCwAAAMSWu1c056RoAQArD4ULAAAAAABQtyhcAAAAAACAukXhAgAAAAAA1C0KFwAAAAAAoG5RuAAAAAAAAHWLwgUAAAAAAKhbFC4AAAAAAEDdonABAAAAAADqFoULAAAAAABQtyhcAAAAAACAukXhAgAAAAAA1C0KFwAAAAAAoG5RuAAAAAAAAHWLwgUAAAAAAKhbFC4AAAAAAEDdMndfvicz+2tJ/2fZnjC+rpb0N1GHwIrBekKYWE8IE+sJYWI9IUysJ4SNNXVx/pG7v+m1k8tauMDFMbOn3X1z1DmwMrCeECbWE8LEekKYWE8IE+sJYWNNLQ23igAAAAAAgLpF4QIAAAAAANQtChf16VNRB8CKwnpCmFhPCBPrCWFiPSFMrCeEjTW1BPS4AAAAAAAAdYsdFwCDOGmGAAAIEklEQVQAAAAAoG5RuKgjZvaLZvbnZvYXZrY36jyINzP7jJm9YGbjUWdB/JnZOjMLzKxoZsfN7KNRZ0J8mVmLmX3DzL5bXk+/G3UmxJ+ZJczsf5vZV6POgngzs+fMbMzMvmNmT0edB/FmZmvN7GEz+175OuqfR50pjrhVpE6YWULS9yVtk/S8pG9KSrv7M5EGQ2yZ2S9ImpD0h+7eGnUexJuZXSvpWnf/tpldIelbkj7AaxRqYWYmabW7T5hZk6RRSR919z+NOBpizMz2SNos6e+5+y9HnQfxZWbPSdrs7n8TdRbEn5k9KOl/uvunzewNkn7K3V+OOlfcsOOifrxT0l+4+w/c/VVJX5C0I+JMiDF3/7qkU1HnwMrg7j9y92+X3z8tqSjpp6NNhbjysybKw6byH36TgpqZ2XWS3i/p01FnAYA5Zvb3JP2CpLwkufurFC1qQ+Gifvy0pJMLxs+LHwoA1CEzWy/pHZL+LNokiLPytv7vSHpB0qC7s56wFP9V0sclzUYdBCuCS3rczL5lZh+JOgxi7R9L+mtJD5RvZfu0ma2OOlQcUbioH3aeOX77BKCumNkaSV+W9Fvu/rdR50F8uXvJ3d8u6TpJ7zQzbmlDTczslyW94O7fijoLVox3u/vPSfolSb9Zvv0WqEWjpJ+TdNDd3yHpFUn0MqwBhYv68bykdQvG10n6q4iyAMA5yr0Ivizpc+7+x1HnwcpQ3jI7IukXI46C+Hq3pK5yX4IvSNpqZv8j2kiIM3f/q/LbFyQ9orO3dAO1eF7S8wt2FT6ss4UMLBKFi/rxTUlvNbMN5aYt/0LSQMSZAEDSfDPFvKSiux+IOg/izczeZGZry++vkvQeSd+LNhXiyt3vcvfr3H29zl4/Dbv7r0ccCzFlZqvLTahV3tK/XRIntKEm7v5jSSfN7GfKU52SaGxeg8aoA+Asd58xs92SjkpKSPqMux+POBZizMwKkjokXW1mz0v6HXfPR5sKMfZuSf9K0li5L4Ek3e3uj0aYCfF1raQHyydqNUj6ortzhCWAenCNpEfO1uvVKOnz7v61aCMh5rolfa78y+kfSLo14jyxxHGoAAAAAACgbnGrCAAAAAAAqFsULgAAAAAAQN2icAEAAAAAAOoWhQsAAAAAAFC3KFwAAAAAAIC6ReECAAAAAADULQoXAACsAGY2EfHz/4aZ/cMF40+b2cYQH/8/mdkPzew7ZvaMmaXDeuwFz/GcmV1tZmvN7PawHx8AANSGwgUAAAjDb0iaL1y4+79192dCfo7/4u5vl7RD0n83s6aQH3/OWkkULgAAqBMULgAAWEHMrMPMRszsYTP7npl9zsys/Hf7yrsVjpnZ/vLcZ83skJn9TzP7vpn9cnk+YWb3mtk3yx//7xY8x8fNbMzMvlt+zA9K2izpc+UdEavKGTaXPz5d/vhxM+td8DgTZpYrP86fmtk1F/M5uvuzkv5O0pXlx3mzmX3NzL5V/jxuKM9/qPyc3zWzr5fnfsPM7l+Q4atm1vGap9gn6c3lz+VeM7vWzL5eHo+b2c8v6osCAACWpDHqAAAAIHTvkHSjpL+S9KSkd5vZM5J+RdIN7u5mtnbBx6+XdJOkN0sKzOwtkv61pJ+4+z81s2ZJT5rZ45JukPQBSf/M3f/OzK5y91NmtlvSne7+tCSVayUq3z7SK+mfSHpJ0uNm9gF3/4qk1ZL+1N2zZvb7knZK+s8X+uTM7OckPevuL5SnPiVpl7s/a2b/TNJ/k7RV0n+U9F53/+FrPt8L2Suptby7Q2Z2h6Sj7p4zs4Skn1rEYwEAgCWicAEAwMrzDXd/XpLM7Ds6W5j4U0mTkj5tZkckfXXBx3/R3WclPWtmP9DZ4sR2SW3l3RSS9EZJb5X0HkkPuPvfSZK7n7pAln8qacTd/7qc53OSfkHSVyS9uiDHtyRtu8Bj/Xsz2ynpH0v6xfLjrZG0RdKX5oolkprLb5+U9Fkz+6KkP77AY1fzTUmfKd+a8hV3/84SHgsAACwSt4oAALDyTC14vySp0d1nJL1T0pd1dsfE1xZ8jL/m37skk9Tt7m8v/9ng7o+X51/78dVYlb+bdve5xyrpwr9Q+S/u/jOSbpb0h2bWorPXMi8vyPl2d09KkrvvkvQfJK2T9B0z+/uSZlR5/dNyoU/A3b+us8WWH0r6IzP71xf6NwAAIDwULgAAuAyUdya80d0flfRbkt6+4K8/ZGYNZvZmnd3N8OeSjkq6ba4Bppm9zcxWS3pc0r8xs58qz19VfozTkq44z1P/maSbyqd1JCSlJT2xlM/F3f9Y0tOSPuzufyvphJl9qJzHzOxny++/2d3/zN3/o6S/0dkCxnOS3l7+fNfpbDHntSo+FzP7R5JecPfDkvKSfm4p+QEAwOJwqwgAAJeHKyT1l3cpmKR/v+Dv/lxniwnX6GyviEkz+7TO3mLy7XJzz7+W9AF3/5qZvV3S02b2qqRHJd0t6bOSDpnZGUn/fO6B3f1HZnaXpKD8vI+6e38In88nJH3ezA5L+peSDprZf5DUJOkLkr4r6V4ze2v5eYfKc5J0QtKYpHFJ337tA7v7i2b2pJmNS3qs/HEfM7NpSRM62/8DAAAsE/v/OzQBAMDlxsw+K+mr7v5w1FkAAADOh1tFAAAAAABA3WLHBQAAqBtmlpX0oddMf8ndc1HkAQAA0aNwAQAAAAAA6ha3igAAAAAAgLpF4QIAAAAAANQtChcAAAAAAKBuUbgAAAAAAAB1i8IFAAAAAACoW/8PB5gKK98Bq0gAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1296x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "train.fillna(-1).boxplot(column='SectionViolations',by='Inspection_Results',figsize=(18,8))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(196591, 15)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.concat([train,test],sort=False).reset_index(drop=True)\n",
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>LicenseNo</th>\n",
       "      <th>FacilityID</th>\n",
       "      <th>FacilityName</th>\n",
       "      <th>Type</th>\n",
       "      <th>Street</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>LocationID</th>\n",
       "      <th>Reason</th>\n",
       "      <th>SectionViolations</th>\n",
       "      <th>RiskLevel</th>\n",
       "      <th>Geo_Loc</th>\n",
       "      <th>Inspection_Results</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>DayOfMonth</th>\n",
       "      <th>Weekday</th>\n",
       "      <th>DayOfYear</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>34750819963711</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>9030</td>\n",
       "      <td>16992</td>\n",
       "      <td>16219</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>13893</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81879.0</td>\n",
       "      <td>LICENSE RE-INSPECTION</td>\n",
       "      <td>NaN</td>\n",
       "      <td>High</td>\n",
       "      <td>locid12327</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2009</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49233286112625</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>37048</td>\n",
       "      <td>24121</td>\n",
       "      <td>22999</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>14398</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81856.0</td>\n",
       "      <td>CANVASS</td>\n",
       "      <td>18.0</td>\n",
       "      <td>High</td>\n",
       "      <td>locid9073</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2009</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11012510134565</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>11750</td>\n",
       "      <td>16348</td>\n",
       "      <td>15593</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>17379</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81876.0</td>\n",
       "      <td>LICENSE</td>\n",
       "      <td>18.0</td>\n",
       "      <td>High</td>\n",
       "      <td>locid16811</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2009</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>18328605468650</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>11244</td>\n",
       "      <td>26886</td>\n",
       "      <td>24377</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>3591</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81858.0</td>\n",
       "      <td>LICENSE RE-INSPECTION</td>\n",
       "      <td>NaN</td>\n",
       "      <td>High</td>\n",
       "      <td>locid14197</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2009</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24257686822403</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>2728</td>\n",
       "      <td>7367</td>\n",
       "      <td>7014</td>\n",
       "      <td>RESTAURANT</td>\n",
       "      <td>72</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81846.0</td>\n",
       "      <td>TAG REMOVAL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Medium</td>\n",
       "      <td>locid7590</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2009</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               ID       Date  LicenseNo  FacilityID  FacilityName        Type  \\\n",
       "0  34750819963711  1231027.2       9030       16992         16219  RESTAURANT   \n",
       "1  49233286112625  1231027.2      37048       24121         22999  RESTAURANT   \n",
       "2  11012510134565  1231027.2      11750       16348         15593  RESTAURANT   \n",
       "3  18328605468650  1231027.2      11244       26886         24377  RESTAURANT   \n",
       "4  24257686822403  1231027.2       2728        7367          7014  RESTAURANT   \n",
       "\n",
       "   Street         City       State  LocationID                 Reason  \\\n",
       "0   13893  id-11235901  id_1890134     81879.0  LICENSE RE-INSPECTION   \n",
       "1   14398  id-11235901  id_1890134     81856.0                CANVASS   \n",
       "2   17379  id-11235901  id_1890134     81876.0                LICENSE   \n",
       "3    3591  id-11235901  id_1890134     81858.0  LICENSE RE-INSPECTION   \n",
       "4      72  id-11235901  id_1890134     81846.0            TAG REMOVAL   \n",
       "\n",
       "   SectionViolations RiskLevel     Geo_Loc  Inspection_Results  Year  Month  \\\n",
       "0                NaN      High  locid12327                 4.0  2009      1   \n",
       "1               18.0      High   locid9073                 1.0  2009      1   \n",
       "2               18.0      High  locid16811                 1.0  2009      1   \n",
       "3                NaN      High  locid14197                 NaN  2009      1   \n",
       "4                NaN    Medium   locid7590                 4.0  2009      1   \n",
       "\n",
       "   DayOfMonth  Weekday  DayOfYear  \n",
       "0           4        6          4  \n",
       "1           4        6          4  \n",
       "2           4        6          4  \n",
       "3           4        6          4  \n",
       "4           4        6          4  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.Date = pd.to_datetime(data.Date.apply(lambda x: x.replace('29-02','28-02')+' 12:00:00' if ('29-02' in x) else x+' 00:00:00'), format='%d-%m-%Y %H:%M:%S')\n",
    "data['Year'] = data.Date.dt.year\n",
    "data['Month'] = data.Date.dt.month\n",
    "data['DayOfMonth'] = data.Date.dt.day\n",
    "data['Weekday'] = data.Date.dt.dayofweek\n",
    "data['DayOfYear'] = data.Date.dt.dayofyear\n",
    "data.Date = data.Date.astype(int)/1e12\n",
    "data = data.sort_values('Date').reset_index(drop=True)\n",
    "data[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>196591</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <td>0</td>\n",
       "      <td>float64</td>\n",
       "      <td>2522</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LicenseNo</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>37395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FacilityID</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>27277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FacilityName</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>26033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Type</th>\n",
       "      <td>4700</td>\n",
       "      <td>object</td>\n",
       "      <td>435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Street</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>17892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>City</th>\n",
       "      <td>0</td>\n",
       "      <td>object</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>State</th>\n",
       "      <td>0</td>\n",
       "      <td>object</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LocationID</th>\n",
       "      <td>50</td>\n",
       "      <td>float64</td>\n",
       "      <td>112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Reason</th>\n",
       "      <td>0</td>\n",
       "      <td>object</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SectionViolations</th>\n",
       "      <td>52044</td>\n",
       "      <td>float64</td>\n",
       "      <td>62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RiskLevel</th>\n",
       "      <td>0</td>\n",
       "      <td>object</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Geo_Loc</th>\n",
       "      <td>0</td>\n",
       "      <td>object</td>\n",
       "      <td>16812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Inspection_Results</th>\n",
       "      <td>49148</td>\n",
       "      <td>float64</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Year</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Month</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DayOfMonth</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Weekday</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DayOfYear</th>\n",
       "      <td>0</td>\n",
       "      <td>int64</td>\n",
       "      <td>364</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        0        1       2\n",
       "ID                      0    int64  196591\n",
       "Date                    0  float64    2522\n",
       "LicenseNo               0    int64   37395\n",
       "FacilityID              0    int64   27277\n",
       "FacilityName            0    int64   26033\n",
       "Type                 4700   object     435\n",
       "Street                  0    int64   17892\n",
       "City                    0   object       2\n",
       "State                   0   object       2\n",
       "LocationID             50  float64     112\n",
       "Reason                  0   object      17\n",
       "SectionViolations   52044  float64      62\n",
       "RiskLevel               0   object       4\n",
       "Geo_Loc                 0   object   16812\n",
       "Inspection_Results  49148  float64       7\n",
       "Year                    0    int64      10\n",
       "Month                   0    int64      12\n",
       "DayOfMonth              0    int64      31\n",
       "Weekday                 0    int64       7\n",
       "DayOfYear               0    int64     364"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([pd.isna(data).sum(),data.dtypes,data.nunique()],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['cummean_sectionViolations_diff'] = data.groupby('FacilityID')['SectionViolations'].cumsum().fillna(0)/(data.groupby('FacilityID')['SectionViolations'].cumcount()+1) -\\\n",
    "                                         data[::-1].groupby('FacilityID')['SectionViolations'].cumsum().fillna(0)/(data[::-1].groupby('FacilityID')['SectionViolations'].cumcount()+1)\n",
    "data['nxt_inspection'] = (data.Date - data.groupby('LicenseNo')['Date'].shift(-2)).fillna(-10)\n",
    "data['cnt_date'] = data.Date.map(data.groupby('Date').size())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols_to_drop = ['ID','LicenseNo','FacilityID','FacilityName','LocationID','City','State']\n",
    "cat_cols = ['Reason','SectionViolations','RiskLevel']\n",
    "data.SectionViolations = data.SectionViolations.fillna(-1)\n",
    "data.LocationID = data.LocationID.fillna(-1)\n",
    "data['isNull_SectionViolations'] = pd.isna(data.SectionViolations).astype(int)\n",
    "data['isNull_LocationID'] = pd.isna(data.LocationID).astype(int)\n",
    "data['isNull_Type'] = pd.isna(data.Type).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['n_date'] = data.groupby('Date')['ID'].transform('count')\n",
    "data['n_license'] = data.groupby('LicenseNo')['ID'].transform('count')\n",
    "data['cc_license'] = data.groupby('LicenseNo')['ID'].cumcount()\n",
    "data['n_facility'] = data.groupby('FacilityID')['ID'].transform('count')\n",
    "data['cc_facility'] = data.groupby('FacilityID')['ID'].cumcount()\n",
    "data['n_street'] = data.groupby('Street')['ID'].transform('count')\n",
    "data['n_city'] = data.groupby('City')['ID'].transform('count')\n",
    "data['n_location'] = data.groupby('LocationID')['ID'].transform('count')\n",
    "data['n_city_date'] = data.groupby(['City','Date'])['ID'].transform('count')\n",
    "data['n_state'] = data.groupby('State')['ID'].transform('count')\n",
    "data['Geo_Loc'] = data.groupby('Geo_Loc')['ID'].transform('count').fillna(1)\n",
    "data['Street'] = data.groupby('Street')['ID'].transform('count').fillna(1)\n",
    "data['Type'] = data.groupby('Type')['ID'].transform('count').fillna(1)\n",
    "\n",
    "data['n_facilityNameInFacilityID'] = data.FacilityID.map(data.groupby('FacilityID')['FacilityName'].nunique())\n",
    "data['n_licenseInLocationID'] = data.LocationID.map(data.groupby('LocationID')['LicenseNo'].nunique())\n",
    "data['n_geolocInLocationID'] = data.LocationID.map(data.groupby('LocationID')['Geo_Loc'].nunique())\n",
    "\n",
    "data['n_date_facilityID'] = data.groupby(['Date','FacilityID'])['ID'].transform('count')\n",
    "data['n_date_license'] = data.groupby(['Date','LicenseNo'])['ID'].transform('count')\n",
    "data['n_date_street'] = data.groupby(['Date','Street'])['ID'].transform('count')\n",
    "data['n_date_geoloc'] = data.groupby(['Date','Geo_Loc'])['ID'].transform('count')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>LicenseNo</th>\n",
       "      <th>FacilityID</th>\n",
       "      <th>FacilityName</th>\n",
       "      <th>Type</th>\n",
       "      <th>Street</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>LocationID</th>\n",
       "      <th>...</th>\n",
       "      <th>n_date_street</th>\n",
       "      <th>n_date_geoloc</th>\n",
       "      <th>date_diff_license</th>\n",
       "      <th>date_diff_license2</th>\n",
       "      <th>date_diff_facility</th>\n",
       "      <th>cc_date</th>\n",
       "      <th>cc_date_street</th>\n",
       "      <th>cc_date_location</th>\n",
       "      <th>cc_date_type</th>\n",
       "      <th>cc_date_facility</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>34750819963711</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>9030</td>\n",
       "      <td>16992</td>\n",
       "      <td>16219</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>16</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81879.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49233286112625</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>37048</td>\n",
       "      <td>24121</td>\n",
       "      <td>22999</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>20</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81856.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11012510134565</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>11750</td>\n",
       "      <td>16348</td>\n",
       "      <td>15593</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>2</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81876.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>18328605468650</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>11244</td>\n",
       "      <td>26886</td>\n",
       "      <td>24377</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>27</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81858.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24257686822403</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>2728</td>\n",
       "      <td>7367</td>\n",
       "      <td>7014</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>312</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81846.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 51 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               ID       Date  LicenseNo  FacilityID  FacilityName      Type  \\\n",
       "0  34750819963711  1231027.2       9030       16992         16219  131182.0   \n",
       "1  49233286112625  1231027.2      37048       24121         22999  131182.0   \n",
       "2  11012510134565  1231027.2      11750       16348         15593  131182.0   \n",
       "3  18328605468650  1231027.2      11244       26886         24377  131182.0   \n",
       "4  24257686822403  1231027.2       2728        7367          7014  131182.0   \n",
       "\n",
       "   Street         City       State  LocationID  ... n_date_street  \\\n",
       "0      16  id-11235901  id_1890134     81879.0  ...             1   \n",
       "1      20  id-11235901  id_1890134     81856.0  ...             1   \n",
       "2       2  id-11235901  id_1890134     81876.0  ...             2   \n",
       "3      27  id-11235901  id_1890134     81858.0  ...             2   \n",
       "4     312  id-11235901  id_1890134     81846.0  ...             1   \n",
       "\n",
       "   n_date_geoloc date_diff_license  date_diff_license2  date_diff_facility  \\\n",
       "0              1             -10.0               -10.0               -10.0   \n",
       "1              1             -10.0               -10.0               -10.0   \n",
       "2              1             -10.0               -10.0               -10.0   \n",
       "3              2             -10.0               -10.0               -10.0   \n",
       "4              1             -10.0               -10.0               -10.0   \n",
       "\n",
       "   cc_date  cc_date_street  cc_date_location  cc_date_type  cc_date_facility  \n",
       "0        7               1                 1             7                 1  \n",
       "1        7               1                 1             7                 1  \n",
       "2        7               2                 1             7                 1  \n",
       "3        7               2                 2             7                 2  \n",
       "4        7               1                 1             7                 1  \n",
       "\n",
       "[5 rows x 51 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['date_diff_license'] = (data.Date - data.groupby('LicenseNo')['Date'].shift(1)).fillna(-10)\n",
    "data['date_diff_license2'] = (data.Date - data.groupby('LicenseNo')['Date'].shift(2)).fillna(-10)\n",
    "data['date_diff_facility'] = (data.Date - data.groupby('FacilityID')['Date'].shift(1)).fillna(-10)\n",
    "data['cc_date'] = data.Date.map(data.groupby('Date').size().cumsum())\n",
    "data['cc_date_street'] = data.groupby(['Date','Street'])['ID'].transform('count')\n",
    "data['cc_date_location'] = data.groupby(['Date','LocationID'])['ID'].transform('count')\n",
    "data['cc_date_type'] = data.groupby(['Date','Type'])['ID'].transform('count')\n",
    "data['cc_date_facility'] = data.groupby(['Date','FacilityID'])['ID'].transform('count')\n",
    "data[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>LicenseNo</th>\n",
       "      <th>FacilityID</th>\n",
       "      <th>FacilityName</th>\n",
       "      <th>Type</th>\n",
       "      <th>Street</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>LocationID</th>\n",
       "      <th>...</th>\n",
       "      <th>n_date_street</th>\n",
       "      <th>n_date_geoloc</th>\n",
       "      <th>date_diff_license</th>\n",
       "      <th>date_diff_license2</th>\n",
       "      <th>date_diff_facility</th>\n",
       "      <th>cc_date</th>\n",
       "      <th>cc_date_street</th>\n",
       "      <th>cc_date_location</th>\n",
       "      <th>cc_date_type</th>\n",
       "      <th>cc_date_facility</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>34750819963711</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>9030</td>\n",
       "      <td>16992</td>\n",
       "      <td>16219</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>16</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81879.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49233286112625</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>37048</td>\n",
       "      <td>24121</td>\n",
       "      <td>22999</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>20</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81856.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11012510134565</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>11750</td>\n",
       "      <td>16348</td>\n",
       "      <td>15593</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>2</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81876.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>18328605468650</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>11244</td>\n",
       "      <td>26886</td>\n",
       "      <td>24377</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>27</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81858.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24257686822403</td>\n",
       "      <td>1231027.2</td>\n",
       "      <td>2728</td>\n",
       "      <td>7367</td>\n",
       "      <td>7014</td>\n",
       "      <td>131182.0</td>\n",
       "      <td>312</td>\n",
       "      <td>id-11235901</td>\n",
       "      <td>id_1890134</td>\n",
       "      <td>81846.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 51 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               ID       Date  LicenseNo  FacilityID  FacilityName      Type  \\\n",
       "0  34750819963711  1231027.2       9030       16992         16219  131182.0   \n",
       "1  49233286112625  1231027.2      37048       24121         22999  131182.0   \n",
       "2  11012510134565  1231027.2      11750       16348         15593  131182.0   \n",
       "3  18328605468650  1231027.2      11244       26886         24377  131182.0   \n",
       "4  24257686822403  1231027.2       2728        7367          7014  131182.0   \n",
       "\n",
       "   Street         City       State  LocationID  ... n_date_street  \\\n",
       "0      16  id-11235901  id_1890134     81879.0  ...             1   \n",
       "1      20  id-11235901  id_1890134     81856.0  ...             1   \n",
       "2       2  id-11235901  id_1890134     81876.0  ...             2   \n",
       "3      27  id-11235901  id_1890134     81858.0  ...             2   \n",
       "4     312  id-11235901  id_1890134     81846.0  ...             1   \n",
       "\n",
       "  n_date_geoloc date_diff_license  date_diff_license2  date_diff_facility  \\\n",
       "0             1             -10.0               -10.0               -10.0   \n",
       "1             1             -10.0               -10.0               -10.0   \n",
       "2             1             -10.0               -10.0               -10.0   \n",
       "3             2             -10.0               -10.0               -10.0   \n",
       "4             1             -10.0               -10.0               -10.0   \n",
       "\n",
       "   cc_date  cc_date_street  cc_date_location  cc_date_type  cc_date_facility  \n",
       "0        7               1                 1             7                 1  \n",
       "1        7               1                 1             7                 1  \n",
       "2        7               2                 1             7                 1  \n",
       "3        7               2                 2             7                 2  \n",
       "4        7               1                 1             7                 1  \n",
       "\n",
       "[5 rows x 51 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for col in cat_cols:\n",
    "    data[col] = data[col].astype('category')\n",
    "data[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['prev_reason_license'] = data.groupby('LicenseNo')['Reason'].shift(1)\n",
    "data['prev_reason_facility'] = data.groupby('FacilityID')['Reason'].shift(1)\n",
    "\n",
    "data['prev_reason_license2'] = data.groupby('LicenseNo')['Reason'].shift(2)\n",
    "data['prev_reason_facility2'] = data.groupby('FacilityID')['Reason'].shift(2)\n",
    "\n",
    "data['nxt_reason_license'] = data.groupby('LicenseNo')['Reason'].shift(-1)\n",
    "data['nxt_reason_facility'] = data.groupby('FacilityID')['Reason'].shift(-1)\n",
    "\n",
    "data['nxt_reason_license2'] = data.groupby('LicenseNo')['Reason'].shift(-2)\n",
    "data['nxt_reason_facility2'] = data.groupby('FacilityID')['Reason'].shift(-2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['prev_result_license'] = data.groupby('LicenseNo')['Inspection_Results'].shift(1).astype('category')\n",
    "data['prev_result_license2'] = data.groupby('LicenseNo')['Inspection_Results'].shift(2).astype('category')\n",
    "\n",
    "data['nxt_result_license'] = data.groupby('LicenseNo')['Inspection_Results'].shift(-1).astype('category')\n",
    "data['nxt_result_license2'] = data.groupby('LicenseNo')['Inspection_Results'].shift(-2).astype('category')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((147443, 63), (49148, 63))"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = data[pd.notna(data.Inspection_Results)].reset_index(drop=True)\n",
    "test = data[pd.isna(data.Inspection_Results)].reset_index(drop=True)\n",
    "train.shape, test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((147443, 55), (49148, 55), (147443,))"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target = train.Inspection_Results\n",
    "train = train.drop(columns=cols_to_drop+['Inspection_Results'])\n",
    "test_ids = test[['ID']]\n",
    "test = test[train.columns]\n",
    "train.shape, test.shape, target.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FACILITY CHANGED</th>\n",
       "      <th>FAIL</th>\n",
       "      <th>FURTHER INSPECTION REQUIRED</th>\n",
       "      <th>INSPECTION OVERRULED</th>\n",
       "      <th>PASS</th>\n",
       "      <th>PASS(CONDITIONAL)</th>\n",
       "      <th>SHUT-DOWN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   FACILITY CHANGED  FAIL  FURTHER INSPECTION REQUIRED  INSPECTION OVERRULED  \\\n",
       "0               0.1   0.1                          0.1                   0.1   \n",
       "1               0.1   0.1                          0.1                   0.1   \n",
       "2               0.1   0.1                          0.1                   0.1   \n",
       "3               0.1   0.1                          0.1                   0.1   \n",
       "4               0.1   0.1                          0.1                   0.1   \n",
       "\n",
       "   PASS  PASS(CONDITIONAL)  SHUT-DOWN  \n",
       "0   0.1                0.1        0.1  \n",
       "1   0.1                0.1        0.1  \n",
       "2   0.1                0.1        0.1  \n",
       "3   0.1                0.1        0.1  \n",
       "4   0.1                0.1        0.1  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "samp = pd.read_excel('data/Sample_Submission.xlsx')\n",
    "samp[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type='text/css'>\n",
       ".datatable table.frame { margin-bottom: 0; }\n",
       ".datatable table.frame thead { border-bottom: none; }\n",
       ".datatable table.frame tr.coltypes td {  color: #FFFFFF;  line-height: 6px;  padding: 0 0.5em;}\n",
       ".datatable .boolean { background: #DDDD99; }\n",
       ".datatable .object  { background: #565656; }\n",
       ".datatable .integer { background: #5D9E5D; }\n",
       ".datatable .float   { background: #4040CC; }\n",
       ".datatable .string  { background: #CC4040; }\n",
       ".datatable .row_index {  background: var(--jp-border-color3);  border-right: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  font-size: 9px;}\n",
       ".datatable .frame tr.coltypes .row_index {  background: var(--jp-border-color0);}\n",
       ".datatable th:nth-child(2) { padding-left: 12px; }\n",
       ".datatable .hellipsis {  color: var(--jp-cell-editor-border-color);}\n",
       ".datatable .vellipsis {  background: var(--jp-layout-color0);  color: var(--jp-cell-editor-border-color);}\n",
       ".datatable .na {  color: var(--jp-cell-editor-border-color);  font-size: 80%;}\n",
       ".datatable .footer { font-size: 9px; }\n",
       ".datatable .frame_dimensions {  background: var(--jp-border-color3);  border-top: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  display: inline-block;  opacity: 0.6;  padding: 1px 10px 1px 5px;}\n",
       "</style>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/yarafath/anaconda3/lib/python3.7/site-packages/lightgbm/__init__.py:48: UserWarning: Starting from version 2.2.1, the library file in distribution wheels for macOS is built by the Apple Clang (Xcode_8.3.3) compiler.\n",
      "This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.\n",
      "Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.\n",
      "You can install the OpenMP library by the following command: ``brew install libomp``.\n",
      "  \"You can install the OpenMP library by the following command: ``brew install libomp``.\", UserWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import KFold\n",
    "from lightgbm import LGBMClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4    0.538873\n",
       "1    0.192997\n",
       "5    0.139925\n",
       "6    0.086074\n",
       "3    0.032195\n",
       "2    0.009631\n",
       "0    0.000305\n",
       "Name: Inspection_Results, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target = target.astype(int)\n",
    "target.value_counts(True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "****************************************************************************************************\n",
      "#################### 0 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.134568\tvalid_0's multi_logloss: 0.157084\n",
      "[1000]\ttraining's multi_logloss: 0.0992199\tvalid_0's multi_logloss: 0.143923\n",
      "[1500]\ttraining's multi_logloss: 0.0809752\tvalid_0's multi_logloss: 0.142925\n",
      "Early stopping, best iteration is:\n",
      "[1655]\ttraining's multi_logloss: 0.0765955\tvalid_0's multi_logloss: 0.142901\n",
      "#################### 1 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.134755\tvalid_0's multi_logloss: 0.153377\n",
      "[1000]\ttraining's multi_logloss: 0.0997269\tvalid_0's multi_logloss: 0.138762\n",
      "[1500]\ttraining's multi_logloss: 0.0814688\tvalid_0's multi_logloss: 0.137098\n",
      "[2000]\ttraining's multi_logloss: 0.0685325\tvalid_0's multi_logloss: 0.136749\n",
      "[2500]\ttraining's multi_logloss: 0.0581137\tvalid_0's multi_logloss: 0.136765\n",
      "Early stopping, best iteration is:\n",
      "[2364]\ttraining's multi_logloss: 0.0607023\tvalid_0's multi_logloss: 0.136666\n",
      "#################### 2 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.134177\tvalid_0's multi_logloss: 0.157718\n",
      "[1000]\ttraining's multi_logloss: 0.0988253\tvalid_0's multi_logloss: 0.14419\n",
      "[1500]\ttraining's multi_logloss: 0.0803341\tvalid_0's multi_logloss: 0.142217\n",
      "[2000]\ttraining's multi_logloss: 0.0672654\tvalid_0's multi_logloss: 0.141334\n",
      "[2500]\ttraining's multi_logloss: 0.056998\tvalid_0's multi_logloss: 0.141077\n",
      "[3000]\ttraining's multi_logloss: 0.0485858\tvalid_0's multi_logloss: 0.141159\n",
      "Early stopping, best iteration is:\n",
      "[2810]\ttraining's multi_logloss: 0.0515139\tvalid_0's multi_logloss: 0.140988\n",
      "#################### 3 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.132901\tvalid_0's multi_logloss: 0.162668\n",
      "[1000]\ttraining's multi_logloss: 0.0977421\tvalid_0's multi_logloss: 0.149845\n",
      "[1500]\ttraining's multi_logloss: 0.0794786\tvalid_0's multi_logloss: 0.14801\n",
      "[2000]\ttraining's multi_logloss: 0.0664073\tvalid_0's multi_logloss: 0.147726\n",
      "Early stopping, best iteration is:\n",
      "[1929]\ttraining's multi_logloss: 0.0680552\tvalid_0's multi_logloss: 0.147677\n",
      "#################### 4 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.134767\tvalid_0's multi_logloss: 0.155955\n",
      "[1000]\ttraining's multi_logloss: 0.0999158\tvalid_0's multi_logloss: 0.141921\n",
      "[1500]\ttraining's multi_logloss: 0.0810467\tvalid_0's multi_logloss: 0.139805\n",
      "[2000]\ttraining's multi_logloss: 0.0677529\tvalid_0's multi_logloss: 0.139115\n",
      "Early stopping, best iteration is:\n",
      "[2237]\ttraining's multi_logloss: 0.0625596\tvalid_0's multi_logloss: 0.13898\n",
      "#################### 5 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.133612\tvalid_0's multi_logloss: 0.161188\n",
      "[1000]\ttraining's multi_logloss: 0.0984273\tvalid_0's multi_logloss: 0.148534\n",
      "[1500]\ttraining's multi_logloss: 0.0801414\tvalid_0's multi_logloss: 0.147851\n",
      "Early stopping, best iteration is:\n",
      "[1703]\ttraining's multi_logloss: 0.0743197\tvalid_0's multi_logloss: 0.147772\n",
      "****************************************************************************************************\n",
      "#################### 6 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.131841\tvalid_0's multi_logloss: 0.162557\n",
      "[1000]\ttraining's multi_logloss: 0.0955977\tvalid_0's multi_logloss: 0.1495\n",
      "[1500]\ttraining's multi_logloss: 0.0768694\tvalid_0's multi_logloss: 0.148173\n",
      "Early stopping, best iteration is:\n",
      "[1496]\ttraining's multi_logloss: 0.0769961\tvalid_0's multi_logloss: 0.148163\n",
      "#################### 7 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.13406\tvalid_0's multi_logloss: 0.153914\n",
      "[1000]\ttraining's multi_logloss: 0.0975815\tvalid_0's multi_logloss: 0.140869\n",
      "[1500]\ttraining's multi_logloss: 0.0785577\tvalid_0's multi_logloss: 0.138894\n",
      "[2000]\ttraining's multi_logloss: 0.0652167\tvalid_0's multi_logloss: 0.138424\n",
      "Early stopping, best iteration is:\n",
      "[1985]\ttraining's multi_logloss: 0.0655565\tvalid_0's multi_logloss: 0.138415\n",
      "#################### 8 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.132779\tvalid_0's multi_logloss: 0.158937\n",
      "[1000]\ttraining's multi_logloss: 0.096091\tvalid_0's multi_logloss: 0.145299\n",
      "[1500]\ttraining's multi_logloss: 0.0774413\tvalid_0's multi_logloss: 0.143482\n",
      "[2000]\ttraining's multi_logloss: 0.0642259\tvalid_0's multi_logloss: 0.143093\n",
      "Early stopping, best iteration is:\n",
      "[2012]\ttraining's multi_logloss: 0.0639463\tvalid_0's multi_logloss: 0.143077\n",
      "#################### 9 ####################\n",
      "Training until validation scores don't improve for 250 rounds\n",
      "[500]\ttraining's multi_logloss: 0.132843\tvalid_0's multi_logloss: 0.159744\n",
      "[1000]\ttraining's multi_logloss: 0.0960745\tvalid_0's multi_logloss: 0.146679\n",
      "[1500]\ttraining's multi_logloss: 0.0774406\tvalid_0's multi_logloss: 0.145327\n",
      "[2000]\ttraining's multi_logloss: 0.064322\tvalid_0's multi_logloss: 0.145207\n",
      "Early stopping, best iteration is:\n",
      "[1772]\ttraining's multi_logloss: 0.069892\tvalid_0's multi_logloss: 0.145088\n"
     ]
    }
   ],
   "source": [
    "kf1 = KFold(6, True, 1001)\n",
    "kf2 = KFold(4, True, 42)\n",
    "\n",
    "all_result = list()\n",
    "for kf in [kf1,kf2]:\n",
    "    print ('*'*100)\n",
    "    for train_idx, val_idx in kf.split(train, target):\n",
    "        print('#'*20, len(all_result), '#'*20)\n",
    "        Xtrain = train.loc[train_idx,:]\n",
    "        Xval = train.loc[val_idx,:]\n",
    "        Ytrain = target.loc[train_idx]\n",
    "        Yval = target.loc[val_idx]\n",
    "        model = LGBMClassifier(n_estimators=6000, objective='multiclass',learning_rate=0.01)\n",
    "        model.fit(Xtrain, Ytrain, eval_set = [(Xval,Yval),(Xtrain, Ytrain)], verbose=500, early_stopping_rounds=250)\n",
    "        all_result.append(model.predict_proba(test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LGBMClassifier(n_estimators=2000, objective='multiclass',learning_rate=0.01)\n",
    "model.fit(train, target)\n",
    "all_result.append(model.predict_proba(test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.get_dummies(train)\n",
    "test = pd.get_dummies(test)[train.columns]\n",
    "train.shape, test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(all_result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "from catboost import CatBoostClassifier\n",
    "kf = KFold(5, True, 330)\n",
    "for train_idx, val_idx in kf.split(train, target):\n",
    "    print('#'*20, len(all_result), '#'*20)\n",
    "    Xtrain = train.loc[train_idx,:]\n",
    "    Xval = train.loc[val_idx,:]\n",
    "    Ytrain = target.loc[train_idx]\n",
    "    Yval = target.loc[val_idx]\n",
    "    model = CatBoostClassifier(objective='MultiClass', learning_rate=0.04, iterations=10000, early_stopping_rounds=250)\n",
    "    model.fit(Xtrain, Ytrain, eval_set=(Xval,Yval), verbose=400)\n",
    "    all_result.append(model.predict_proba(test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FACILITY CHANGED</th>\n",
       "      <th>FAIL</th>\n",
       "      <th>FURTHER INSPECTION REQUIRED</th>\n",
       "      <th>INSPECTION OVERRULED</th>\n",
       "      <th>PASS</th>\n",
       "      <th>PASS(CONDITIONAL)</th>\n",
       "      <th>SHUT-DOWN</th>\n",
       "      <th>ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.033621e-09</td>\n",
       "      <td>0.003353</td>\n",
       "      <td>0.000004</td>\n",
       "      <td>0.000014</td>\n",
       "      <td>0.995926</td>\n",
       "      <td>0.000685</td>\n",
       "      <td>0.000018</td>\n",
       "      <td>18328605468650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.972987e-08</td>\n",
       "      <td>0.005376</td>\n",
       "      <td>0.000006</td>\n",
       "      <td>0.000024</td>\n",
       "      <td>0.993546</td>\n",
       "      <td>0.000940</td>\n",
       "      <td>0.000107</td>\n",
       "      <td>12864736170514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.472968e-08</td>\n",
       "      <td>0.001780</td>\n",
       "      <td>0.000001</td>\n",
       "      <td>0.000013</td>\n",
       "      <td>0.997467</td>\n",
       "      <td>0.000724</td>\n",
       "      <td>0.000015</td>\n",
       "      <td>19070455936767</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.985305e-06</td>\n",
       "      <td>0.183724</td>\n",
       "      <td>0.000029</td>\n",
       "      <td>0.000781</td>\n",
       "      <td>0.742533</td>\n",
       "      <td>0.071000</td>\n",
       "      <td>0.001930</td>\n",
       "      <td>39558576524615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.296969e-08</td>\n",
       "      <td>0.969635</td>\n",
       "      <td>0.000003</td>\n",
       "      <td>0.000023</td>\n",
       "      <td>0.002285</td>\n",
       "      <td>0.028006</td>\n",
       "      <td>0.000049</td>\n",
       "      <td>15149149207234</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   FACILITY CHANGED      FAIL  FURTHER INSPECTION REQUIRED  \\\n",
       "0      8.033621e-09  0.003353                     0.000004   \n",
       "1      4.972987e-08  0.005376                     0.000006   \n",
       "2      1.472968e-08  0.001780                     0.000001   \n",
       "3      1.985305e-06  0.183724                     0.000029   \n",
       "4      2.296969e-08  0.969635                     0.000003   \n",
       "\n",
       "   INSPECTION OVERRULED      PASS  PASS(CONDITIONAL)  SHUT-DOWN  \\\n",
       "0              0.000014  0.995926           0.000685   0.000018   \n",
       "1              0.000024  0.993546           0.000940   0.000107   \n",
       "2              0.000013  0.997467           0.000724   0.000015   \n",
       "3              0.000781  0.742533           0.071000   0.001930   \n",
       "4              0.000023  0.002285           0.028006   0.000049   \n",
       "\n",
       "               ID  \n",
       "0  18328605468650  \n",
       "1  12864736170514  \n",
       "2  19070455936767  \n",
       "3  39558576524615  \n",
       "4  15149149207234  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = pd.DataFrame(np.mean(all_result,axis=0), columns=samp.columns)\n",
    "result['ID'] = test_ids.ID\n",
    "result[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>idx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47571900570810</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>19685766253655</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37885701847801</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>39946284936679</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46793577796409</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               ID  idx\n",
       "0  47571900570810    0\n",
       "1  19685766253655    1\n",
       "2  37885701847801    2\n",
       "3  39946284936679    3\n",
       "4  46793577796409    4"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_idx = pd.read_excel('data/Data_Test.xlsx', usecols=['ID'])\n",
    "test_idx['idx'] = test_idx.index.values\n",
    "test_idx[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FACILITY CHANGED</th>\n",
       "      <th>FAIL</th>\n",
       "      <th>FURTHER INSPECTION REQUIRED</th>\n",
       "      <th>INSPECTION OVERRULED</th>\n",
       "      <th>PASS</th>\n",
       "      <th>PASS(CONDITIONAL)</th>\n",
       "      <th>SHUT-DOWN</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>idx</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.139328e-08</td>\n",
       "      <td>0.748230</td>\n",
       "      <td>8.433342e-05</td>\n",
       "      <td>0.048449</td>\n",
       "      <td>0.002757</td>\n",
       "      <td>0.200318</td>\n",
       "      <td>0.000162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.104622e-08</td>\n",
       "      <td>0.000812</td>\n",
       "      <td>1.419760e-06</td>\n",
       "      <td>0.000018</td>\n",
       "      <td>0.997948</td>\n",
       "      <td>0.001198</td>\n",
       "      <td>0.000023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5.291214e-08</td>\n",
       "      <td>0.002663</td>\n",
       "      <td>1.537605e-03</td>\n",
       "      <td>0.890845</td>\n",
       "      <td>0.093888</td>\n",
       "      <td>0.002159</td>\n",
       "      <td>0.008907</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.539387e-07</td>\n",
       "      <td>0.039941</td>\n",
       "      <td>8.996900e-06</td>\n",
       "      <td>0.000060</td>\n",
       "      <td>0.025217</td>\n",
       "      <td>0.001593</td>\n",
       "      <td>0.933179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.678241e-09</td>\n",
       "      <td>0.000387</td>\n",
       "      <td>8.526741e-07</td>\n",
       "      <td>0.000014</td>\n",
       "      <td>0.998425</td>\n",
       "      <td>0.001155</td>\n",
       "      <td>0.000018</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     FACILITY CHANGED      FAIL  FURTHER INSPECTION REQUIRED  \\\n",
       "idx                                                            \n",
       "0        8.139328e-08  0.748230                 8.433342e-05   \n",
       "1        1.104622e-08  0.000812                 1.419760e-06   \n",
       "2        5.291214e-08  0.002663                 1.537605e-03   \n",
       "3        1.539387e-07  0.039941                 8.996900e-06   \n",
       "4        4.678241e-09  0.000387                 8.526741e-07   \n",
       "\n",
       "     INSPECTION OVERRULED      PASS  PASS(CONDITIONAL)  SHUT-DOWN  \n",
       "idx                                                                \n",
       "0                0.048449  0.002757           0.200318   0.000162  \n",
       "1                0.000018  0.997948           0.001198   0.000023  \n",
       "2                0.890845  0.093888           0.002159   0.008907  \n",
       "3                0.000060  0.025217           0.001593   0.933179  \n",
       "4                0.000014  0.998425           0.001155   0.000018  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sol = pd.merge(result, test_idx, on='ID').drop(columns=['ID']).set_index('idx').sort_index()\n",
    "sol[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "sol.to_excel('solution_lgb10models.xlsx',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# all_result_tm = np.array(all_result)**(1/len(all_result))\n",
    "# result = pd.DataFrame(np.prod(all_result_tm,axis=0), columns=samp.columns)\n",
    "# result = (result.T/result.sum(axis=1)).T\n",
    "# result['ID'] = test_ids.ID\n",
    "# test_idx = pd.read_excel('data/Data_Test.xlsx', usecols=['ID'])\n",
    "# test_idx['idx'] = test_idx.index.values\n",
    "# sol = pd.merge(result, test_idx, on='ID').drop(columns=['ID']).set_index('idx').sort_index()\n",
    "# sol.to_excel('solution10.xlsx',index=False)\n",
    "# sol[:5]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
